
    Chw                        S SK Jr  S SKrS SKJrJr  S SKJr  \R                  " \	5      r
\(       a  S SKJrJrJr   S SKJr     S             S	S jjrg! \ a     N f = f)
    )annotationsN)TYPE_CHECKINGLiteral)save_or_push_to_hub_modelCrossEncoderSentenceTransformerSparseEncoder)OptimizationConfigc                  ^^^ SSK JnJnJn   SSKJn	Jn
  SSKJn  [        X5      =(       aA    [        U 5      =(       a/    [        U S   S5      =(       a    [        U S   R                  U	5      n[        X5      =(       aA    [        U 5      =(       a/    [        U S   S5      =(       a    [        U S   R                  U	5      n[        X5      =(       a    [        U R                  U	5      nU(       d  U(       d  U(       d  [        S5      eU(       d  U(       a  U S   R                  nOU R                  nU
R!                  U5      m[        T["        5      (       a7  TUR$                  ;  a  [        S5      eT=(       d    Tm['        UT5      " 5       mTc  S
m[)        UUU4S jSTUUUTSU S9	  g	! [         a    [        S5      ef = f)ao  
Export an optimized ONNX model from a SentenceTransformer, SparseEncoder, or CrossEncoder model.

The O1-O4 optimization levels are defined by Optimum and are documented here:
https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/optimization

The optimization levels are:

- O1: basic general optimizations.
- O2: basic and extended general optimizations, transformers-specific fusions.
- O3: same as O2 with GELU approximation.
- O4: same as O3 with mixed precision (fp16, GPU-only)

See the following pages for more information & benchmarks:

- `Sentence Transformer > Usage > Speeding up Inference <https://sbert.net/docs/sentence_transformer/usage/efficiency.html>`_
- `Cross Encoder > Usage > Speeding up Inference <https://sbert.net/docs/cross_encoder/usage/efficiency.html>`_

Args:
    model (SentenceTransformer | SparseEncoder | CrossEncoder): The SentenceTransformer, SparseEncoder,
        or CrossEncoder model to be optimized. Must be loaded with `backend="onnx"`.
    optimization_config (OptimizationConfig | Literal["O1", "O2", "O3", "O4"]): The optimization configuration or level.
    model_name_or_path (str): The path or Hugging Face Hub repository name where the optimized model will be saved.
    push_to_hub (bool, optional): Whether to push the optimized model to the Hugging Face Hub. Defaults to False.
    create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
    file_suffix (str | None, optional): The suffix to add to the optimized model file name. Defaults to None.

Raises:
    ImportError: If the required packages `optimum` and `onnxruntime` are not installed.
    ValueError: If the provided model is not a valid SentenceTransformer, SparseEncoder, or CrossEncoder model loaded with `backend="onnx"`.
    ValueError: If the provided optimization_config is not valid.

Returns:
    None
r   r   )ORTModelORTOptimizer)AutoOptimizationConfigzPlease install Optimum and ONNX Runtime to use this function. You can install them with pip: `pip install optimum[onnxruntime]` or `pip install optimum[onnxruntime-gpu]`
auto_modelz}The model must be a Transformer-based SentenceTransformer, SparseEncoder, or CrossEncoder model loaded with `backend="onnx"`.z\optimization_config must be an OptimizationConfig instance or one of 'O1', 'O2', 'O3', 'O4'.N	optimizedc                &   > TR                  TU TS9$ )N)file_suffix)optimize)save_dirr   optimization_config	optimizers    ^/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/backend/optimize.py<lambda>-export_optimized_onnx_model.<locals>.<lambda>o   s    ););<OQYgr);)s    export_optimized_onnx_modelonnx)	export_functionexport_function_nameconfigmodel_name_or_pathpush_to_hub	create_prr   backendmodel)sentence_transformersr   r	   r
   optimum.onnxruntimer   r   !optimum.onnxruntime.configurationr   ImportError
isinstancelenhasattrr   r%   
ValueErrorfrom_pretrainedstr_LEVELSgetattrr   )r%   r   r!   r"   r#   r   r   r	   r
   r   r   r   viable_st_modelviable_se_modelviable_ce_model	ort_modelr   s    `   `          @r   r   r      s   V WV
>L 	5. 	6J	6E!Hl+	6 uQx**H5	  	5( 	6J	6E!Hl+	6 uQx**H5	  !5[*U[[RZ:[O/ L
 	
 /#Ah11	#kk	,,Y7I%s++&<&D&DDn  "8%8%&<>QRT!s:"-
W  
8
 	

s   F/ /G)FFN)r%   z2SentenceTransformer | SparseEncoder | CrossEncoderr   z4OptimizationConfig | Literal['O1', 'O2', 'O3', 'O4']r!   r/   r"   boolr#   r6   r   z
str | NonereturnNone)
__future__r   loggingtypingr   r   #sentence_transformers.backend.utilsr   	getLogger__name__loggerr&   r   r	   r
   r(   r   r)   r    r   r   <module>rA      s    "  ) I			8	$VVH "e=eMe e 	e
 e e 
e	  s   A A$#A$