
    <h                     x   S SK Jr  S SKJr  S SKJr  SSKJrJrJ	r	J
r
Jr  SSKJrJrJrJrJrJr  SSKJr  \R*                  " \5      r " S	 S
\5      r " S S\5      r " S S\5      r " S S\5      r " S S\	5      r " S S\5      r " S S\5      r " S S\
5      r " S S\5      r  " S S\5      r!/ SQr"g)   )CausalLMOutputWithPast)Unpack)logging   )DeepseekV3DecoderLayerDeepseekV3MLPDeepseekV3MoEDeepseekV3PreTrainedModelDeepseekV3TopkRouter)Qwen3AttentionQwen3ForCausalLM
Qwen3ModelQwen3RMSNormQwen3RotaryEmbeddingTransformersKwargs   )Dots1Configc                       \ rS rSrSrg)Dots1RMSNorm'    N__name__
__module____qualname____firstlineno____static_attributes__r       _/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/dots1/modular_dots1.pyr   r   '       r   r   c                       \ rS rSrSrg)Dots1RotaryEmbedding+   r   Nr   r   r   r   r"   r"   +   r    r   r"   c                       \ rS rSrSrg)Dots1Attention/   r   Nr   r   r   r   r%   r%   /   r    r   r%   c                       \ rS rSrSrg)Dots1MLP3   r   Nr   r   r   r   r(   r(   3   r    r   r(   c                       \ rS rSrSrg)Dots1MoE7   r   Nr   r   r   r   r+   r+   7   r    r   r+   c                       \ rS rSrSrg)Dots1TopkRouter;   r   Nr   r   r   r   r.   r.   ;   r    r   r.   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )Dots1DecoderLayer?   config	layer_idxc                 J   > [         TU ]  5         UR                  U   U l        g )N)super__init__layer_typesattention_type)selfr3   r4   	__class__s      r   r7   Dots1DecoderLayer.__init__@   s!    $00;r   )r9   )	r   r   r   r   r   intr7   r   __classcell__r;   s   @r   r1   r1   ?   s    <{ <s < <r   r1   c                       \ rS rSrSrg)Dots1PreTrainedModelE   r   Nr   r   r   r   rA   rA   E   r    r   rA   c                       \ rS rSrSrg)
Dots1ModelI   r   Nr   r   r   r   rD   rD   I   r    r   rD   c                   :   ^  \ rS rSrS\\   S\4U 4S jjrSrU =r	$ )Dots1ForCausalLMM   super_kwargsreturnc                 $   > [         TU ]  " S0 UD6$ )a  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

Example:

```python
>>> from transformers import AutoTokenizer, Dots1ForCausalLM

>>> model = Dots1ForCausalLM.from_pretrained("rednote-hilab/dots1.llm1.inst")
>>> tokenizer = AutoTokenizer.from_pretrained("rednote-hilab/dots1.llm1.inst")

>>> prompt = "Hey, are you conscious? Can you talk to me?"
>>> inputs = tokenizer(prompt, return_tensors="pt")

>>> # Generate
>>> generate_ids = model.generate(inputs.input_ids, max_length=30)
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
```r   )r6   forward)r:   rI   r;   s     r   rL   Dots1ForCausalLM.forwardN   s    4 w...r   r   )
r   r   r   r   r   r   r   rL   r   r>   r?   s   @r   rG   rG   M   s%    /12/ 
 / /r   rG   )rA   rD   rG   N)#modeling_outputsr   processing_utilsr   utilsr    deepseek_v3.modeling_deepseek_v3r   r   r	   r
   r   qwen3.modeling_qwen3r   r   r   r   r   r   configuration_dots1r   
get_loggerr   loggerr   r"   r%   r(   r+   r.   r1   rA   rD   rG   __all__r   r   r   <module>rW      s    7 &    - 
		H	%	< 		/ 		^ 		} 		} 		* 	<. <	4 		 	/' /<r   