From 55df9d43a8ff5766f838b492c107338fe893e195 Mon Sep 17 00:00:00 2001
From: Max <naturale@hufs.ac.kr>
Date: Sat, 19 Jul 2025 15:34:52 +0900
Subject: [PATCH 01/15] feat: nmt draft

---
 .../ko/quantization/compressed_tensors.md     | 190 ++++++++++++++++++
 1 file changed, 190 insertions(+)
 create mode 100644 docs/source/ko/quantization/compressed_tensors.md
diff --git a/docs/source/ko/quantization/compressed_tensors.md b/docs/source/ko/quantization/compressed_tensors.md
new file mode 100644
index 000000000000..ca6b8a2ed0dc
--- /dev/null
+++ b/docs/source/ko/quantization/compressed_tensors.md
@@ -0,0 +1,190 @@
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# compressed-tensors[[compressed-tensors]]
+
+[compressed-tensors](https://github.com/neuralmagic/compressed-tensors)는 [safetensors](https://github.com/huggingface/safetensors) 파일을 압축된 텐서 데이터 타입으로 확장해서, dense, int-quantized (int8), float-quantized (fp8), pack-quantized (int32로 패킹된 int4나 int8 가중치 양자화) 같은 다양한 양자화와 sparse 형식을 저장하고 불러올 수 있는 통합 체크포인트 형식을 제공해 줍니다.
+
+compressed-tensors는 [PEFT](https://huggingface.co/docs/peft)를 사용한 파인튜닝을 지원하고, 다음과 같은 기능들도 함께 제공합니다.
+
+- fp8, int4, int8 가중치와 활성화 정밀도를 지원합니다.
+- [tensor, channel, group, block, token](https://github.com/neuralmagic/compressed-tensors/blob/83b2e7a969d70606421a76b9a3d112646077c8de/src/compressed_tensors/quantization/quant_args.py#L43-L52)별로 양자화 스케일과 영점 전략을 설정할 수 있습니다.
+- 동적 토큰별 활성화 양자화 (또는 정적 전략)를 사용할 수 있습니다.
+- 가중치 sparsity (구조화되지 않은 형태나 2:4 같은 반구조화 형태)를 양자화와 함께 사용해서 더욱 강력한 압축이 가능합니다.
+- [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈뿐만 아니라 어떤 모듈이든 양자화할 수 있습니다.
+- 이름이나 클래스로 특정 모듈만 선택해서 지원합니다.
+
+[PyPI](https://pypi.org/project/compressed-tensors)에서 compressed-tensors를 설치해서 최신 안정 버전을 받으시거나 (추천), 소스에서 설치해서 최신 기능을 사용하실 수 있습니다.
+
+<hfoptions id="install">
+<hfoption id="PyPI">
+
+```bash
+pip install compressed-tensors
+```
+
+</hfoption>
+<hfoption id="source code">
+
+```bash
+git clone https://github.com/neuralmagic/compressed-tensors
+cd compressed-tensors
+pip install -e .
+```
+
+</hfoption>
+</hfoptions>
+
+Hugging Face Hub에서 호환되는 모델을 찾으시려면 compressed-tensors [태그](https://huggingface.co/models?other=compressed-tensors)로 검색해 보세요.
+
+현재는 이미 양자화된 모델만 불러올 수 있고, 모델을 불러온 후에는 저장할 수 없습니다. 모델을 compressed-tensors 형식으로 양자화하고 싶으시다면 [llm-compressor](https://github.com/vllm-project/llm-compressor)를 참고해 주세요. 또는 모델을 따로 만들어서 compressed-tensors 설정으로 직렬화하실 수도 있습니다.
+
+```python
+from transformers import AutoModelForCausalLM
+
+ct_model = AutoModelForCausalLM.from_pretrained("nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf", device_map="auto")
+
+# 메모리 사용량 측정하기
+mem_params = sum([param.nelement()*param.element_size() for param in ct_model.parameters()])
+print(f"{mem_params/2**30:.4f} GB")
+# 8.4575 GB
+```
+
+## 모델 체크포인트[[model-checkpoint]]
+
+compressed-tensor 모델은 설정 항목을 통해 정의됩니다. 다음 예시는 [nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf/blob/main/config.json) `config.json` 파일에서 가져왔습니다.
+
+압축하는 동안과 압축 후에 유연하게 표현할 수 있도록 많은 항목들이 있지만, 로딩과 추론을 위한 항목들은 주요 항목 몇 개만 보시면 충분합니다.
+
+```yaml
+"quantization_config": {
+  "config_groups": {
+    "group_0": {
+      "input_activations": {
+        "num_bits": 8,
+        "strategy": "tensor",
+        "type": "float"
+      },
+      "targets": ["Linear"],
+      "weights": {
+        "num_bits": 8,
+        "strategy": "tensor",
+        "type": "float"
+      }
+    }
+  },
+  "format": "naive-quantized",
+  "ignore": ["lm_head"],
+  "quant_method": "compressed-tensors",
+  "quantization_status": "frozen"
+},
+```
+
+설정 파일은 설정 그룹(`group_0`)의 양자화를 지정하는데, 정적 텐서별 전략으로 fp8에 가중치와 활성화 양자화를 포함합니다. `lm_head` 모듈은 `ignore` 키에 나와 있듯이 양자화하지 않습니다.
+
+모델 가중치를 더 자세히 보고 싶으시다면, 모델 카드의 [safetensors 뷰어](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf?show_file_info=model.safetensors.index.json)를 사용해서 모든 [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈의 양자화된 가중치, 입력 스케일, 가중치 스케일을 확인하실 수 있습니다.
+
+| 텐서 | 형태 |	정밀도 |
+| ------- | ----- | --------- |
+model.layers.0.input_layernorm.weight	| [4 096]	| BF16 
+model.layers.0.mlp.down_proj.input_scale	| [1]	| BF16 
+model.layers.0.mlp.down_proj.weight	| [4 096, 14 336] |	F8_E4M3 
+model.layers.0.mlp.down_proj.weight_scale |	[1]	| BF16 
+model.layers.0.mlp.gate_proj.input_scale |	[1]	| BF16 
+model.layers.0.mlp.gate_proj.weight	| [14 336, 4 096]	| F8_E4M3 
+model.layers.0.mlp.gate_proj.weight_scale	| [1] |	BF16 
+model.layers.0.mlp.up_proj.input_scale|	[1]	|BF16 
+model.layers.0.mlp.up_proj.weight |	[14 336, 4 096]	| F8_E4M3 
+model.layers.0.mlp.up_proj.weight_scale | [1]	| BF16 
+model.layers.0.post_attention_layernorm.weight |	[4 096]	|BF16 
+model.layers.0.self_attn.k_proj.input_scale |	[1]	|  BF16
+model.layers.0.self_attn.k_proj.weight |	[1 024, 4 096]|	F8_E4M3
+model.layers.0.self_attn.k_proj.weight_scale |[1]	| BF16 
+model.layers.0.self_attn.o_proj.input_scale	| [1]	| BF16
+model.layers.0.self_attn.o_proj.weight | [4 096, 4 096]	| F8_E4M3 
+model.layers.0.self_attn.o_proj.weight_scale | [1]	| BF16 
+model.layers.0.self_attn.q_proj.input_scale	| [1]	| BF16 
+model.layers.0.self_attn.q_proj.weight | [4 096, 4 096]	| F8_E4M3 
+model.layers.0.self_attn.q_proj.weight_scale |	[1] | BF16 
+model.layers.0.self_attn.v_proj.input_scale	| [1] | BF16 
+model.layers.0.self_attn.v_proj.weight |	[1 024, 4 096]	| F8_E4M3 
+model.layers.0.self_attn.v_proj.weight_scale |	[1] |	BF16 
+
+[`~quantizers.HFQuantizer`] 통합으로 compressed-tensors 모델을 불러올 때, 양자화 설정에 지정된 모든 [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈은 압축된 가중치와 추론을 위한 순전파를 관리하는 [CompressedLinear](https://github.com/neuralmagic/compressed-tensors/blob/975cb223b19fcac2b98a4271d17668462d4d6e1d/src/compressed_tensors/linear/compressed_linear.py#L30) 모듈로 바뀝니다. `lm_head` 모듈은 여전히 양자화되지 않은 nn.Linear 모듈로 남아 있습니다.
+
+```python
+from transformers import AutoModelForCausalLM
+
+ct_model = AutoModelForCausalLM.from_pretrained("nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf")
+print(ct_model)
+"""
+LlamaForCausalLM(
+  (model): LlamaModel(
+    (embed_tokens): Embedding(128256, 4096)
+    (layers): ModuleList(
+      (0-31): 32 x LlamaDecoderLayer(
+        (self_attn): LlamaSdpaAttention(
+          (q_proj): CompressedLinear(
+            in_features=4096, out_features=4096, bias=False
+            (input_observer): MovingAverageMinMaxObserver()
+            (weight_observer): MovingAverageMinMaxObserver()
+          )
+          (k_proj): CompressedLinear(
+            in_features=4096, out_features=1024, bias=False
+            (input_observer): MovingAverageMinMaxObserver()
+            (weight_observer): MovingAverageMinMaxObserver()
+          )
+          (v_proj): CompressedLinear(
+            in_features=4096, out_features=1024, bias=False
+            (input_observer): MovingAverageMinMaxObserver()
+            (weight_observer): MovingAverageMinMaxObserver()
+          )
+          (o_proj): CompressedLinear(
+            in_features=4096, out_features=4096, bias=False
+            (input_observer): MovingAverageMinMaxObserver()
+            (weight_observer): MovingAverageMinMaxObserver()
+          )
+          (rotary_emb): LlamaRotaryEmbedding()
+        )
+        (mlp): LlamaMLP(
+          (gate_proj): CompressedLinear(
+            in_features=4096, out_features=14336, bias=False
+            (input_observer): MovingAverageMinMaxObserver()
+            (weight_observer): MovingAverageMinMaxObserver()
+          )
+          (up_proj): CompressedLinear(
+            in_features=4096, out_features=14336, bias=False
+            (input_observer): MovingAverageMinMaxObserver()
+            (weight_observer): MovingAverageMinMaxObserver()
+          )
+          (down_proj): CompressedLinear(
+            in_features=14336, out_features=4096, bias=False
+            (input_observer): MovingAverageMinMaxObserver()
+            (weight_observer): MovingAverageMinMaxObserver()
+          )
+          (act_fn): SiLU()
+        )
+        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
+        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
+      )
+    )
+    (norm): LlamaRMSNorm((4096,), eps=1e-05)
+    (rotary_emb): LlamaRotaryEmbedding()
+  )
+  (lm_head): Linear(in_features=4096, out_features=128256, bias=False)
+)
+"""
+```

From 9ce029102d31a8f591d2f4fa769d5bab046adb65 Mon Sep 17 00:00:00 2001
From: Max <naturale@hufs.ac.kr>
Date: Sat, 19 Jul 2025 16:07:20 +0900
Subject: [PATCH 02/15] fix: improve translation accuracy and natural
 expressions

---
 .../ko/quantization/compressed_tensors.md     | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/docs/source/ko/quantization/compressed_tensors.md b/docs/source/ko/quantization/compressed_tensors.md
index ca6b8a2ed0dc..ab4e21105a20 100644
--- a/docs/source/ko/quantization/compressed_tensors.md
+++ b/docs/source/ko/quantization/compressed_tensors.md
@@ -16,18 +16,18 @@ rendered properly in your Markdown viewer.
 
 # compressed-tensors[[compressed-tensors]]
 
-[compressed-tensors](https://github.com/neuralmagic/compressed-tensors)는 [safetensors](https://github.com/huggingface/safetensors) 파일을 압축된 텐서 데이터 타입으로 확장해서, dense, int-quantized (int8), float-quantized (fp8), pack-quantized (int32로 패킹된 int4나 int8 가중치 양자화) 같은 다양한 양자화와 sparse 형식을 저장하고 불러올 수 있는 통합 체크포인트 형식을 제공해 줍니다.
+[compressed-tensors](https://github.com/neuralmagic/compressed-tensors)는 [safetensors](https://github.com/huggingface/safetensors) 파일을 압축된 텐서 데이터 타입으로 확장해서, dense, int 양자화(int8), float 양자화(fp8), pack 양자화(int32로 패킹된 int4나 int8 가중치 양자화) 등 다양한 양자화·sparse 형식을 하나의 체크포인트 형식으로 저장하고 불러올 수 있게 합니다.
 
-compressed-tensors는 [PEFT](https://huggingface.co/docs/peft)를 사용한 파인튜닝을 지원하고, 다음과 같은 기능들도 함께 제공합니다.
+compressed-tensors는 [PEFT](https://huggingface.co/docs/peft)를 사용한 파인튜닝을 지원하며, 다음과 같은 기능들을 제공합니다.
 
-- fp8, int4, int8 가중치와 활성화 정밀도를 지원합니다.
-- [tensor, channel, group, block, token](https://github.com/neuralmagic/compressed-tensors/blob/83b2e7a969d70606421a76b9a3d112646077c8de/src/compressed_tensors/quantization/quant_args.py#L43-L52)별로 양자화 스케일과 영점 전략을 설정할 수 있습니다.
-- 동적 토큰별 활성화 양자화 (또는 정적 전략)를 사용할 수 있습니다.
-- 가중치 sparsity (구조화되지 않은 형태나 2:4 같은 반구조화 형태)를 양자화와 함께 사용해서 더욱 강력한 압축이 가능합니다.
-- [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈뿐만 아니라 어떤 모듈이든 양자화할 수 있습니다.
-- 이름이나 클래스로 특정 모듈만 선택해서 지원합니다.
+- fp8, int4, int8 가중치 및 활성화 정밀도.
+- [tensor, channel, group, block, token](https://github.com/neuralmagic/compressed-tensors/blob/83b2e7a969d70606421a76b9a3d112646077c8de/src/compressed_tensors/quantization/quant_args.py#L43-L52) 수준의 양자화 스케일과 영점 전략을 제공합니다.
+- 토큰별 동적 활성화 양자화(또는 정적 전략)를 지원합니다.
+- 구조화되지 않은 형태 또는 2:4와 같은 반구조화된 형태의 가중치 희소성을 양자화와 결합하여 극한의 압축을 달성할 수 있습니다.
+- [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈뿐만 아니라 어떤 모듈이든 양자화가 가능합니다.
+- 모듈 이름 또는 클래스별 양자화 대상을 지정할 수 있습니다.
 
-[PyPI](https://pypi.org/project/compressed-tensors)에서 compressed-tensors를 설치해서 최신 안정 버전을 받으시거나 (추천), 소스에서 설치해서 최신 기능을 사용하실 수 있습니다.
+최신 안정 버전은 [PyPI](https://pypi.org/project/compressed-tensors)에서 설치할 수 있습니다. 안정화되지 않은 최신 기능을 사용하려면 소스 코드를 이용해 설치하실 수 있습니다.
 
 <hfoptions id="install">
 <hfoption id="PyPI">
@@ -48,9 +48,9 @@ pip install -e .
 </hfoption>
 </hfoptions>
 
-Hugging Face Hub에서 호환되는 모델을 찾으시려면 compressed-tensors [태그](https://huggingface.co/models?other=compressed-tensors)로 검색해 보세요.
+compressed-tensors [태그](https://huggingface.co/models?other=compressed-tensors)를 사용하여 Hugging Face Hub에서 양자화된 모델을 찾을 수 있습니다. 
 
-현재는 이미 양자화된 모델만 불러올 수 있고, 모델을 불러온 후에는 저장할 수 없습니다. 모델을 compressed-tensors 형식으로 양자화하고 싶으시다면 [llm-compressor](https://github.com/vllm-project/llm-compressor)를 참고해 주세요. 또는 모델을 따로 만들어서 compressed-tensors 설정으로 직렬화하실 수도 있습니다.
+현재는 이미 양자화된 모델만 불러올 수 있고, 불러온 모델은 다시 저장할 수 없습니다. compressed-tensors 형식으로 모델을 양자화하려면 [llm-compressor](https://github.com/vllm-project/llm-compressor)를 참고해 주세요. 또는 모델을 직접 생성하고 compressed-tensors 설정으로 직렬화할 수도 있습니다.
 
 ```python
 from transformers import AutoModelForCausalLM
@@ -65,9 +65,9 @@ print(f"{mem_params/2**30:.4f} GB")
 
 ## 모델 체크포인트[[model-checkpoint]]
 
-compressed-tensor 모델은 설정 항목을 통해 정의됩니다. 다음 예시는 [nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf/blob/main/config.json) `config.json` 파일에서 가져왔습니다.
+compressed-tensor 모델은 구성 항목을 통해 정의됩니다. 다음 예시는 [nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf/blob/main/config.json) `config.json` 파일에서 가져온 것입니다.
 
-압축하는 동안과 압축 후에 유연하게 표현할 수 있도록 많은 항목들이 있지만, 로딩과 추론을 위한 항목들은 주요 항목 몇 개만 보시면 충분합니다.
+압축 전후의 유연한 표현을 위해 많은 항목이 존재하지만, 모델 불러오기와 추론에는 핵심 항목 몇 가지만 알아도 됩니다.
 
 ```yaml
 "quantization_config": {
@@ -93,9 +93,9 @@ compressed-tensor 모델은 설정 항목을 통해 정의됩니다. 다음 예
 },
 ```
 
-설정 파일은 설정 그룹(`group_0`)의 양자화를 지정하는데, 정적 텐서별 전략으로 fp8에 가중치와 활성화 양자화를 포함합니다. `lm_head` 모듈은 `ignore` 키에 나와 있듯이 양자화하지 않습니다.
+구성 파일은 구성 그룹(`group_0`)의 양자화를 지정하며, 정적 per-tensor 전략으로 가중치와 활성화를 fp8로 양자화합니다. `ignore` 키에 명시된 것처럼 `lm_head` 모듈은 양자화되지 않습니다.
 
-모델 가중치를 더 자세히 보고 싶으시다면, 모델 카드의 [safetensors 뷰어](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf?show_file_info=model.safetensors.index.json)를 사용해서 모든 [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈의 양자화된 가중치, 입력 스케일, 가중치 스케일을 확인하실 수 있습니다.
+모델 가중치를 더 자세히 보려면, 모델 카드의 [safetensors 뷰어](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf?show_file_info=model.safetensors.index.json)를 사용하여 모든 [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈의 양자화된 가중치, 입력 스케일, 가중치 스케일을 확인할 수 있습니다.
 
 | 텐서 | 형태 |	정밀도 |
 | ------- | ----- | --------- |
@@ -123,7 +123,7 @@ model.layers.0.self_attn.v_proj.input_scale	| [1] | BF16
 model.layers.0.self_attn.v_proj.weight |	[1 024, 4 096]	| F8_E4M3 
 model.layers.0.self_attn.v_proj.weight_scale |	[1] |	BF16 
 
-[`~quantizers.HFQuantizer`] 통합으로 compressed-tensors 모델을 불러올 때, 양자화 설정에 지정된 모든 [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈은 압축된 가중치와 추론을 위한 순전파를 관리하는 [CompressedLinear](https://github.com/neuralmagic/compressed-tensors/blob/975cb223b19fcac2b98a4271d17668462d4d6e1d/src/compressed_tensors/linear/compressed_linear.py#L30) 모듈로 바뀝니다. `lm_head` 모듈은 여전히 양자화되지 않은 nn.Linear 모듈로 남아 있습니다.
+compressed-tensors 모델을 [`~quantizers.HFQuantizer`] 통합으로 불러오면, 양자화 설정에 지정된 모든 [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈이 [CompressedLinear](https://github.com/neuralmagic/compressed-tensors/blob/975cb223b19fcac2b98a4271d17668462d4d6e1d/src/compressed_tensors/linear/compressed_linear.py#L30) 모듈로 대체되어 압축 가중치와 순전파를 관리합니다. `lm_head` 모듈은 여전히 양자화되지 않은 nn.Linear 모듈로 유지됩니다.
 
 ```python
 from transformers import AutoModelForCausalLM

From 513e2e25926278223d00bf1972200386300fa4b5 Mon Sep 17 00:00:00 2001
From: Max <naturale@hufs.ac.kr>
Date: Sat, 19 Jul 2025 16:13:19 +0900
Subject: [PATCH 03/15] fix: applyed grammer check, improved naturaility

---
 docs/source/ko/quantization/compressed_tensors.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/ko/quantization/compressed_tensors.md b/docs/source/ko/quantization/compressed_tensors.md
index ab4e21105a20..18506eafcc61 100644
--- a/docs/source/ko/quantization/compressed_tensors.md
+++ b/docs/source/ko/quantization/compressed_tensors.md
@@ -16,7 +16,7 @@ rendered properly in your Markdown viewer.
 
 # compressed-tensors[[compressed-tensors]]
 
-[compressed-tensors](https://github.com/neuralmagic/compressed-tensors)는 [safetensors](https://github.com/huggingface/safetensors) 파일을 압축된 텐서 데이터 타입으로 확장해서, dense, int 양자화(int8), float 양자화(fp8), pack 양자화(int32로 패킹된 int4나 int8 가중치 양자화) 등 다양한 양자화·sparse 형식을 하나의 체크포인트 형식으로 저장하고 불러올 수 있게 합니다.
+[compressed-tensors](https://github.com/neuralmagic/compressed-tensors)는 [safetensors](https://github.com/huggingface/safetensors) 파일을 압축된 텐서 데이터 타입으로 확장해서, dense, int 양자화(int8), float 양자화(fp8), pack 양자화(int32로 패킹된 int4 또는 int8 가중치 양자화) 등 다양한 양자화·sparse 형식을 하나의 체크포인트 형식으로 저장하고 불러올 수 있게 합니다.
 
 compressed-tensors는 [PEFT](https://huggingface.co/docs/peft)를 사용한 파인튜닝을 지원하며, 다음과 같은 기능들을 제공합니다.
 
@@ -24,7 +24,7 @@ compressed-tensors는 [PEFT](https://huggingface.co/docs/peft)를 사용한 파
 - [tensor, channel, group, block, token](https://github.com/neuralmagic/compressed-tensors/blob/83b2e7a969d70606421a76b9a3d112646077c8de/src/compressed_tensors/quantization/quant_args.py#L43-L52) 수준의 양자화 스케일과 영점 전략을 제공합니다.
 - 토큰별 동적 활성화 양자화(또는 정적 전략)를 지원합니다.
 - 구조화되지 않은 형태 또는 2:4와 같은 반구조화된 형태의 가중치 희소성을 양자화와 결합하여 극한의 압축을 달성할 수 있습니다.
-- [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈뿐만 아니라 어떤 모듈이든 양자화가 가능합니다.
+- [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈뿐만 아니라 어떤 모듈이든 양자화할 수 있습니다.
 - 모듈 이름 또는 클래스별 양자화 대상을 지정할 수 있습니다.
 
 최신 안정 버전은 [PyPI](https://pypi.org/project/compressed-tensors)에서 설치할 수 있습니다. 안정화되지 않은 최신 기능을 사용하려면 소스 코드를 이용해 설치하실 수 있습니다.

From 457effe1e4bed3007d5d43dcf1e4b9a822ec8568 Mon Sep 17 00:00:00 2001
From: Max <naturale@hufs.ac.kr>
Date: Sat, 19 Jul 2025 16:15:45 +0900
Subject: [PATCH 04/15] fix: update toctree.yml

---
 docs/source/ko/_toctree.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/source/ko/_toctree.yml b/docs/source/ko/_toctree.yml
index 75632c1f59a5..ed1add258d03 100644
--- a/docs/source/ko/_toctree.yml
+++ b/docs/source/ko/_toctree.yml
@@ -147,6 +147,8 @@
     title: (번역중) Getting started
   - local: quantization/bitsandbytes
     title: bitsandbytes
+  - local: quantization/compressed_tensors
+    title: compressed-tensors
   - local: quantization/gptq
     title: GPTQ
   - local: quantization/awq

From 42a8e44a494bc3c44e000a7e3d963686532c9635 Mon Sep 17 00:00:00 2001
From: Max <naturale@hufs.ac.kr>
Date: Sun, 20 Jul 2025 13:10:42 +0900
Subject: [PATCH 05/15] fix: revised awkward words

---
 docs/source/ko/quantization/compressed_tensors.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/ko/quantization/compressed_tensors.md b/docs/source/ko/quantization/compressed_tensors.md
index 18506eafcc61..b942ba066198 100644
--- a/docs/source/ko/quantization/compressed_tensors.md
+++ b/docs/source/ko/quantization/compressed_tensors.md
@@ -20,9 +20,9 @@ rendered properly in your Markdown viewer.
 
 compressed-tensors는 [PEFT](https://huggingface.co/docs/peft)를 사용한 파인튜닝을 지원하며, 다음과 같은 기능들을 제공합니다.
 
-- fp8, int4, int8 가중치 및 활성화 정밀도.
+- fp8, int4, int8 가중치 및 활성화 함수 출력 정밀도.
 - [tensor, channel, group, block, token](https://github.com/neuralmagic/compressed-tensors/blob/83b2e7a969d70606421a76b9a3d112646077c8de/src/compressed_tensors/quantization/quant_args.py#L43-L52) 수준의 양자화 스케일과 영점 전략을 제공합니다.
-- 토큰별 동적 활성화 양자화(또는 정적 전략)를 지원합니다.
+- 토큰별 동적 활성화 함수 기반 양자화(또는 정적 전략)를 지원합니다.
 - 구조화되지 않은 형태 또는 2:4와 같은 반구조화된 형태의 가중치 희소성을 양자화와 결합하여 극한의 압축을 달성할 수 있습니다.
 - [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈뿐만 아니라 어떤 모듈이든 양자화할 수 있습니다.
 - 모듈 이름 또는 클래스별 양자화 대상을 지정할 수 있습니다.
@@ -93,7 +93,7 @@ compressed-tensor 모델은 구성 항목을 통해 정의됩니다. 다음 예
 },
 ```
 
-구성 파일은 구성 그룹(`group_0`)의 양자화를 지정하며, 정적 per-tensor 전략으로 가중치와 활성화를 fp8로 양자화합니다. `ignore` 키에 명시된 것처럼 `lm_head` 모듈은 양자화되지 않습니다.
+구성 파일은 구성 그룹(`group_0`)의 양자화를 지정하며, 정적 per-tensor 전략으로 가중치와 활성화 함수 기반 양자화를 fp8로 양자화합니다. `ignore` 키에 명시된 것처럼 `lm_head` 모듈은 양자화되지 않습니다.
 
 모델 가중치를 더 자세히 보려면, 모델 카드의 [safetensors 뷰어](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf?show_file_info=model.safetensors.index.json)를 사용하여 모든 [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈의 양자화된 가중치, 입력 스케일, 가중치 스케일을 확인할 수 있습니다.
 

From eeb22587a4a700ecdaa0c3eea2157b5e6070b835 Mon Sep 17 00:00:00 2001
From: Max <naturale@hufs.ac.kr>
Date: Sun, 20 Jul 2025 13:13:23 +0900
Subject: [PATCH 06/15] fix: revised sentence in line96

---
 docs/source/ko/quantization/compressed_tensors.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/ko/quantization/compressed_tensors.md b/docs/source/ko/quantization/compressed_tensors.md
index b942ba066198..f9a30be700d0 100644
--- a/docs/source/ko/quantization/compressed_tensors.md
+++ b/docs/source/ko/quantization/compressed_tensors.md
@@ -93,7 +93,7 @@ compressed-tensor 모델은 구성 항목을 통해 정의됩니다. 다음 예
 },
 ```
 
-구성 파일은 구성 그룹(`group_0`)의 양자화를 지정하며, 정적 per-tensor 전략으로 가중치와 활성화 함수 기반 양자화를 fp8로 양자화합니다. `ignore` 키에 명시된 것처럼 `lm_head` 모듈은 양자화되지 않습니다.
+구성 파일은 구성 그룹(`group_0`)에 대해 정적 per-tensor 전략으로 가중치와 활성화 함수 기반 값을 fp8로 양자화하도록 지정합니다. `ignore` 키에 명시된 것처럼 `lm_head` 모듈은 양자화되지 않습니다.
 
 모델 가중치를 더 자세히 보려면, 모델 카드의 [safetensors 뷰어](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf?show_file_info=model.safetensors.index.json)를 사용하여 모든 [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈의 양자화된 가중치, 입력 스케일, 가중치 스케일을 확인할 수 있습니다.
 

From c38688d761255b7f369fd7c25e229900638f0976 Mon Sep 17 00:00:00 2001
From: MaCAT <138701551+maximizemaxwell@users.noreply.github.com>
Date: Tue, 22 Jul 2025 01:20:11 +0900
Subject: [PATCH 07/15] Update compressed_tensors.md

Co-authored-by: Harheem Kim <49297157+harheem@users.noreply.github.com>
---
 docs/source/ko/quantization/compressed_tensors.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/ko/quantization/compressed_tensors.md b/docs/source/ko/quantization/compressed_tensors.md
index f9a30be700d0..1350a1d166d0 100644
--- a/docs/source/ko/quantization/compressed_tensors.md
+++ b/docs/source/ko/quantization/compressed_tensors.md
@@ -18,7 +18,7 @@ rendered properly in your Markdown viewer.
 
 [compressed-tensors](https://github.com/neuralmagic/compressed-tensors)는 [safetensors](https://github.com/huggingface/safetensors) 파일을 압축된 텐서 데이터 타입으로 확장해서, dense, int 양자화(int8), float 양자화(fp8), pack 양자화(int32로 패킹된 int4 또는 int8 가중치 양자화) 등 다양한 양자화·sparse 형식을 하나의 체크포인트 형식으로 저장하고 불러올 수 있게 합니다.
 
-compressed-tensors는 [PEFT](https://huggingface.co/docs/peft)를 사용한 파인튜닝을 지원하며, 다음과 같은 기능들을 제공합니다.
+compressed-tensors는 [PEFT](https://huggingface.co/docs/peft)를 사용한 미세 조정을 지원하며, 다음과 같은 기능들을 제공합니다.
 
 - fp8, int4, int8 가중치 및 활성화 함수 출력 정밀도.
 - [tensor, channel, group, block, token](https://github.com/neuralmagic/compressed-tensors/blob/83b2e7a969d70606421a76b9a3d112646077c8de/src/compressed_tensors/quantization/quant_args.py#L43-L52) 수준의 양자화 스케일과 영점 전략을 제공합니다.

From 1c9886400372dc40a41fdc0c6d431a103fd13c71 Mon Sep 17 00:00:00 2001
From: MaCAT <138701551+maximizemaxwell@users.noreply.github.com>
Date: Tue, 22 Jul 2025 01:20:31 +0900
Subject: [PATCH 08/15] Update compressed_tensors.md

Co-authored-by: Harheem Kim <49297157+harheem@users.noreply.github.com>
---
 docs/source/ko/quantization/compressed_tensors.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/ko/quantization/compressed_tensors.md b/docs/source/ko/quantization/compressed_tensors.md
index 1350a1d166d0..e51c4506c6ee 100644
--- a/docs/source/ko/quantization/compressed_tensors.md
+++ b/docs/source/ko/quantization/compressed_tensors.md
@@ -65,7 +65,7 @@ print(f"{mem_params/2**30:.4f} GB")
 
 ## 모델 체크포인트[[model-checkpoint]]
 
-compressed-tensor 모델은 구성 항목을 통해 정의됩니다. 다음 예시는 [nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf/blob/main/config.json) `config.json` 파일에서 가져온 것입니다.
+compressed-tensor 모델은 설정 항목을 통해 정의됩니다. 다음 예시는 [nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf/blob/main/config.json) `config.json` 파일에서 가져온 것입니다.
 
 압축 전후의 유연한 표현을 위해 많은 항목이 존재하지만, 모델 불러오기와 추론에는 핵심 항목 몇 가지만 알아도 됩니다.
 

From 8409f2402e80187871de48d6f5bdea3f1aff75e0 Mon Sep 17 00:00:00 2001
From: MaCAT <138701551+maximizemaxwell@users.noreply.github.com>
Date: Tue, 22 Jul 2025 01:20:57 +0900
Subject: [PATCH 09/15] Update compressed_tensors.md

Co-authored-by: Harheem Kim <49297157+harheem@users.noreply.github.com>
---
 docs/source/ko/quantization/compressed_tensors.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/ko/quantization/compressed_tensors.md b/docs/source/ko/quantization/compressed_tensors.md
index e51c4506c6ee..cb4a7a9f9e16 100644
--- a/docs/source/ko/quantization/compressed_tensors.md
+++ b/docs/source/ko/quantization/compressed_tensors.md
@@ -23,7 +23,7 @@ compressed-tensors는 [PEFT](https://huggingface.co/docs/peft)를 사용한 미
 - fp8, int4, int8 가중치 및 활성화 함수 출력 정밀도.
 - [tensor, channel, group, block, token](https://github.com/neuralmagic/compressed-tensors/blob/83b2e7a969d70606421a76b9a3d112646077c8de/src/compressed_tensors/quantization/quant_args.py#L43-L52) 수준의 양자화 스케일과 영점 전략을 제공합니다.
 - 토큰별 동적 활성화 함수 기반 양자화(또는 정적 전략)를 지원합니다.
-- 구조화되지 않은 형태 또는 2:4와 같은 반구조화된 형태의 가중치 희소성을 양자화와 결합하여 극한의 압축을 달성할 수 있습니다.
+- 비정형 또는 2:4와 같은 반정형 가중치 희소성을 양자화와 결합하여 압축률 극대화
 - [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈뿐만 아니라 어떤 모듈이든 양자화할 수 있습니다.
 - 모듈 이름 또는 클래스별 양자화 대상을 지정할 수 있습니다.
 

From 5bea391c70b0e56d234b8c2da100aa25bbfdb924 Mon Sep 17 00:00:00 2001
From: MaCAT <138701551+maximizemaxwell@users.noreply.github.com>
Date: Tue, 22 Jul 2025 01:21:02 +0900
Subject: [PATCH 10/15] Update compressed_tensors.md

Co-authored-by: Harheem Kim <49297157+harheem@users.noreply.github.com>
---
 docs/source/ko/quantization/compressed_tensors.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/ko/quantization/compressed_tensors.md b/docs/source/ko/quantization/compressed_tensors.md
index cb4a7a9f9e16..fd2969dbcb0c 100644
--- a/docs/source/ko/quantization/compressed_tensors.md
+++ b/docs/source/ko/quantization/compressed_tensors.md
@@ -24,7 +24,7 @@ compressed-tensors는 [PEFT](https://huggingface.co/docs/peft)를 사용한 미
 - [tensor, channel, group, block, token](https://github.com/neuralmagic/compressed-tensors/blob/83b2e7a969d70606421a76b9a3d112646077c8de/src/compressed_tensors/quantization/quant_args.py#L43-L52) 수준의 양자화 스케일과 영점 전략을 제공합니다.
 - 토큰별 동적 활성화 함수 기반 양자화(또는 정적 전략)를 지원합니다.
 - 비정형 또는 2:4와 같은 반정형 가중치 희소성을 양자화와 결합하여 압축률 극대화
-- [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈뿐만 아니라 어떤 모듈이든 양자화할 수 있습니다.
+- [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈뿐만 아닌 임의의 모듈 양자화
 - 모듈 이름 또는 클래스별 양자화 대상을 지정할 수 있습니다.
 
 최신 안정 버전은 [PyPI](https://pypi.org/project/compressed-tensors)에서 설치할 수 있습니다. 안정화되지 않은 최신 기능을 사용하려면 소스 코드를 이용해 설치하실 수 있습니다.

From 40372911355b361d89f33d3378b3846e0ccc5212 Mon Sep 17 00:00:00 2001
From: MaCAT <138701551+maximizemaxwell@users.noreply.github.com>
Date: Thu, 31 Jul 2025 21:45:32 +0900
Subject: [PATCH 11/15] =?UTF-8?q?compressed=5Ftensors.md=20=EC=97=85?=
 =?UTF-8?q?=EB=8D=B0=EC=9D=B4=ED=8A=B8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Woojun Jung <46880056+jungnerd@users.noreply.github.com>
---
 docs/source/ko/quantization/compressed_tensors.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/ko/quantization/compressed_tensors.md b/docs/source/ko/quantization/compressed_tensors.md
index fd2969dbcb0c..c9c7c82a9c3f 100644
--- a/docs/source/ko/quantization/compressed_tensors.md
+++ b/docs/source/ko/quantization/compressed_tensors.md
@@ -16,7 +16,7 @@ rendered properly in your Markdown viewer.
 
 # compressed-tensors[[compressed-tensors]]
 
-[compressed-tensors](https://github.com/neuralmagic/compressed-tensors)는 [safetensors](https://github.com/huggingface/safetensors) 파일을 압축된 텐서 데이터 타입으로 확장해서, dense, int 양자화(int8), float 양자화(fp8), pack 양자화(int32로 패킹된 int4 또는 int8 가중치 양자화) 등 다양한 양자화·sparse 형식을 하나의 체크포인트 형식으로 저장하고 불러올 수 있게 합니다.
+[compressed-tensors](https://github.com/neuralmagic/compressed-tensors)는 [safetensors](https://github.com/huggingface/safetensors) 파일을 압축된 텐서 데이터 타입으로 확장해서, dense, int 양자화(int8), float 양자화(fp8), pack 양자화(int32로 패킹된 int4 또는 int8 가중치 양자화) 등 다양한 양자화 및 희소성 형식을 하나의 체크포인트 형식으로 저장하고 불러올 수 있게 합니다.
 
 compressed-tensors는 [PEFT](https://huggingface.co/docs/peft)를 사용한 미세 조정을 지원하며, 다음과 같은 기능들을 제공합니다.
 

From cf0a8dad14acdcab95c8b69d0b48957663f422e5 Mon Sep 17 00:00:00 2001
From: MaCAT <138701551+maximizemaxwell@users.noreply.github.com>
Date: Thu, 31 Jul 2025 21:45:38 +0900
Subject: [PATCH 12/15] =?UTF-8?q?compressed=5Ftensors.md=20=EC=97=85?=
 =?UTF-8?q?=EB=8D=B0=EC=9D=B4=ED=8A=B8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Woojun Jung <46880056+jungnerd@users.noreply.github.com>
---
 docs/source/ko/quantization/compressed_tensors.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/ko/quantization/compressed_tensors.md b/docs/source/ko/quantization/compressed_tensors.md
index c9c7c82a9c3f..61ac2cabcf11 100644
--- a/docs/source/ko/quantization/compressed_tensors.md
+++ b/docs/source/ko/quantization/compressed_tensors.md
@@ -65,7 +65,7 @@ print(f"{mem_params/2**30:.4f} GB")
 
 ## 모델 체크포인트[[model-checkpoint]]
 
-compressed-tensor 모델은 설정 항목을 통해 정의됩니다. 다음 예시는 [nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf/blob/main/config.json) `config.json` 파일에서 가져온 것입니다.
+compressed-tensor 모델은 설정 항목을 통해 정의됩니다. 다음은 [nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf/blob/main/config.json) `config.json` 파일에서 가져온 예시입니다.
 
 압축 전후의 유연한 표현을 위해 많은 항목이 존재하지만, 모델 불러오기와 추론에는 핵심 항목 몇 가지만 알아도 됩니다.
 

From 6b4a97df1abc52b319f38bd047cce71d05e94099 Mon Sep 17 00:00:00 2001
From: MaCAT <138701551+maximizemaxwell@users.noreply.github.com>
Date: Thu, 31 Jul 2025 21:45:44 +0900
Subject: [PATCH 13/15] =?UTF-8?q?compressed=5Ftensors.md=20=EC=97=85?=
 =?UTF-8?q?=EB=8D=B0=EC=9D=B4=ED=8A=B8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Woojun Jung <46880056+jungnerd@users.noreply.github.com>
---
 docs/source/ko/quantization/compressed_tensors.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/ko/quantization/compressed_tensors.md b/docs/source/ko/quantization/compressed_tensors.md
index 61ac2cabcf11..1efc6ba0acfe 100644
--- a/docs/source/ko/quantization/compressed_tensors.md
+++ b/docs/source/ko/quantization/compressed_tensors.md
@@ -93,7 +93,7 @@ compressed-tensor 모델은 설정 항목을 통해 정의됩니다. 다음은 [
 },
 ```
 
-구성 파일은 구성 그룹(`group_0`)에 대해 정적 per-tensor 전략으로 가중치와 활성화 함수 기반 값을 fp8로 양자화하도록 지정합니다. `ignore` 키에 명시된 것처럼 `lm_head` 모듈은 양자화되지 않습니다.
+구성 파일은 구성 그룹(`group_0`)에 대해 텐서별 정적 전략으로 가중치와 활성화 함수 기반 값을 fp8로 양자화하도록 지정합니다. `ignore` 키에 명시된 것처럼 `lm_head` 모듈은 양자화되지 않습니다.
 
 모델 가중치를 더 자세히 보려면, 모델 카드의 [safetensors 뷰어](https://huggingface.co/nm-testing/Meta-Llama-3.1-8B-Instruct-FP8-hf?show_file_info=model.safetensors.index.json)를 사용하여 모든 [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) 모듈의 양자화된 가중치, 입력 스케일, 가중치 스케일을 확인할 수 있습니다.
 

From dd73f9146a879f3bdaf34898dca3b17571a64a7a Mon Sep 17 00:00:00 2001
From: Max <naturale@hufs.ac.kr>
Date: Tue, 5 Aug 2025 13:10:50 +0900
Subject: [PATCH 14/15] fix: toctree from main

---
 docs/source/ko/_toctree.yml | 1202 +++++++++++++++++++++++------------
 1 file changed, 779 insertions(+), 423 deletions(-)

diff --git a/docs/source/ko/_toctree.yml b/docs/source/ko/_toctree.yml
index 64ce94cde40e..b1ebddf57986 100644
--- a/docs/source/ko/_toctree.yml
+++ b/docs/source/ko/_toctree.yml
@@ -1,36 +1,220 @@
 - sections:
   - local: index
     title: 🤗 Transformers
-  - local: quicktour
-    title: 둘러보기
   - local: installation
     title: 설치방법
+  - local: quicktour
+    title: 둘러보기
   title: 시작하기
-- sections:
-  - local: pipeline_tutorial
-    title: Pipeline으로 추론하기
-  - local: autoclass_tutorial
-    title: AutoClass로 사전 학습된 인스턴스 로드하기
-  - local: preprocessing
-    title: 데이터 전처리하기
-  - local: training
-    title: 사전 학습된 모델 미세 조정하기
-  - local: run_scripts
-    title: 스크립트로 학습하기
-  - local: accelerate
-    title: 🤗 Accelerate로 분산 학습 구성하기
+- isExpanded: false
+  sections:
+  - sections:
+    - local: in_translation
+      title: (번역중) Loading models
+    - local: custom_models
+      title: 사용자 정의 모델 공유하기
+    - local: how_to_hack_models
+      title: 모델 구성 요소 맞춤 설정하기
+    - local: model_sharing
+      title: 만든 모델 공유하기
+    - local: modular_transformers
+      title: transformers에서의 모듈성
+    - local: add_new_model
+      title: 🤗 Transformers에 새로운 모델을 추가하는 방법
+    - local: in_translation
+      title: (번역중) Documenting a model
+    - local: in_translation
+      title: (번역중) Customizing attention function
+    title: 모델
+  - sections:
+    - local: fast_tokenizers
+      title: 🤗 Tokenizers 라이브러리에서 토크나이저 사용하기
+    - local: in_translation
+      title: (번역중) Image processors
+    - local: in_translation
+      title: (번역중) Video processors
+    - local: in_translation
+      title: (번역중) Backbones
+    - local: in_translation
+      title: (번역중) Feature extractors
+    - local: in_translation
+      title: (번역중) Processors
+    - local: tokenizer_summary
+      title: 토크나이저 요약
+    - local: pad_truncation
+      title: 패딩과 잘라내기
+    title: 전처리기(Preprocessors)
+  title: Base classes
+- isExpanded: false
+  sections:
+  - sections:
+    - local: pipeline_tutorial
+      title: Pipeline으로 추론하기
+    - local: pipeline_gradio
+      title: 머신러닝 앱
+    - local: pipeline_webserver
+      title: 추론 웹 서버를 위한 파이프라인
+    - local: add_new_pipeline
+      title: 어떻게 🤗 Transformers에 파이프라인을 추가하나요?
+    title: 파이프라인 API
+  - sections:
+    - local: llm_tutorial
+      title: 대규모 언어 모델로 생성하기
+    - local: generation_strategies
+      title: 텍스트 생성 전략 사용자 정의
+    - local: in_translation
+      title: (번역중) Generation features
+    - local: tasks/prompting
+      title: 대규모 언어 모델 프롬프팅 가이드
+    - local: llm_optims
+      title: LLM 추론 최적화
+    - local: in_translation
+      title: (번역중) Caching
+    - local: in_translation
+      title: (번역중) KV cache strategies
+    - local: serving
+      title: 모델 서빙하기
+    - local: llm_tutorial_optimization
+      title: LLM을 최대한 활용하기
+    - local: perplexity
+      title: 고정 길이 모델의 펄플렉서티(Perplexity)
+    title: 거대 언어 모델(LLMs)
+  - sections:
+    - local: conversations
+      title: Transformers로 채팅하기
+    - local: chat_templating
+      title: 챗봇 템플릿 익히기
+    - local: in_translation
+      title: (번역중) Multimodal templates
+    - local: in_translation
+      title: (번역중) Template writing
+    - local: in_translation
+      title: (번역중) Tools and RAG
+    title: 모델을 사용해 대화하기
+  - sections:
+    - local: in_translation
+      title: (번역중) torch.compile
+    - local: perf_infer_gpu_one
+      title: 하나의 GPU를 활용한 추론
+    - local: perf_infer_gpu_multi
+      title: (번역중) Distributed inference
+    - local: perf_infer_cpu
+      title: CPU로 추론하기
+    title: 최적화(Optimization)
+  - local: in_translation
+    title: (번역중) Agents
+  - local: in_translation
+    title: (번역중) Tools
+  title: 추론(Inference)
+- isExpanded: false
+  sections:
+  - sections:
+    - local: trainer
+      title: 트레이너(Trainer)
+    - local: training
+      title: 사전 학습된 모델 미세 조정하기
+    - local: in_translation
+      title: (번역중) Optimizers
+    - local: hpo_train
+      title: Trainer API를 사용한 하이퍼파라미터 탐색
+    title: Trainer API
+  - sections:
+    - local: accelerator_selection
+      title: (번역중) Accelerator selection
+    - local: accelerate
+      title: 🤗 Accelerate로 분산 학습 구성하기
+    - local: fsdp
+      title: 완전 분할 데이터 병렬 처리
+    - local: deepspeed
+      title: DeepSpeed
+    - local: debugging
+      title: 디버깅
+    - local: perf_train_cpu_many
+      title: 다중 CPU에서 학습하기
+    - local: perf_train_gpu_many
+      title: 다중 GPU에서 학습 진행하기
+    title: 분산 학습(Distributed training)
+  - sections:
+    - local: perf_train_gpu_one
+      title: GPU
+    - local: perf_train_cpu
+      title: CPU에서 훈련
+    - local: perf_train_special
+      title: Apple 실리콘에서 PyTorch 학습
+    - local: in_translation
+      title: (번역중) Intel Gaudi
+    - local: perf_hardware
+      title: 훈련용 사용자 맞춤형 하드웨어
+    title: 하드웨어
   - local: peft
     title: 🤗 PEFT로 어댑터 로드 및 학습하기
-  - local: model_sharing
-    title: 만든 모델 공유하기
-  - local: llm_tutorial
-    title: 대규모 언어 모델로 생성하기
-  - local: conversations
-    title: Transformers로 채팅하기
-  title: 튜토리얼
-- sections:
-  - isExpanded: false
-    sections:
+  - local: model_memory_anatomy
+    title: 모델 학습 해부하기
+  title: 학습(Training)
+- isExpanded: false
+  sections:
+  - local: in_translation
+    title: (번역중) Overview
+  - local: in_translation
+    title: (번역중) Selecting a quantization method
+  - local: in_translation
+    title: (번역중) Quantization concepts
+  - local: in_translation
+    title: (번역중) AQLM
+  - local: in_translation
+    title: (번역중) AutoRound
+  - local: quantization/awq
+    title: AWQ
+  - local: in_translation
+    title: (번역중) BitNet
+  - local: quantization/bitsandbytes
+    title: bitsandbytes
+  - local: in_translation
+    title: (번역중) compressed-tensors
+  - local: quantization/eetq
+    title: EETQ
+  - local: in_translation
+    title: (번역중) FBGEMM
+  - local: in_translation
+    title: (번역중) Fine-grained FP8
+  - local: gguf
+    title: GGUF 파일들과의 상호 운용성
+  - local: quantization/gptq
+    title: GPTQ
+  - local: in_translation
+    title: (번역중) HIGGS
+  - local: in_translation
+    title: (번역중) HQQ
+  - local: in_translation
+    title: (번역중) Optimum
+  - local: quantization/quanto
+    title: Quanto
+  - local: quantization/quark
+    title: Quark
+  - local: in_translation
+    title: (번역중) torchao
+  - local: in_translation
+    title: (번역중) SpQR
+  - local: in_translation
+    title: (번역중) VPTQ
+  - local: in_translation
+    title: (번역중) Contribute
+  title: 양자화(Quantization)
+- isExpanded: false
+  sections:
+  - local: serialization
+    title: ONNX로 내보내기
+  - local: tflite
+    title: TFLite로 내보내기
+  - local: executorch
+    title: ExecuTorch
+  - local: torchscript
+    title: TorchScript로 내보내기
+  title: 배포환경에 내보내기
+- isExpanded: false
+  sections:
+  - sections:
+    - sections:
       - local: tasks/sequence_classification
         title: 텍스트 분류
       - local: tasks/token_classification
@@ -47,24 +231,22 @@
         title: 요약
       - local: tasks/multiple_choice
         title: 객관식 문제(Multiple Choice)
-    title: 자연어처리
-  - isExpanded: false
-    sections:
+      title: 자연어처리
+    - sections:
       - local: tasks/audio_classification
         title: 오디오 분류
       - local: tasks/asr
         title: 자동 음성 인식
-    title: 오디오
-  - isExpanded: false
-    sections:
+      title: 오디오
+    - sections:
       - local: tasks/image_classification
         title: 이미지 분류
       - local: tasks/semantic_segmentation
-        title: 의미적 분할(Semantic segmentation)
+        title: 이미지 세그멘테이션
       - local: tasks/video_classification
-        title: 영상 분류
+        title: 비디오 분류
       - local: tasks/object_detection
-        title: 객체 탐지
+        title: 객체 탐지(Object detection)
       - local: tasks/zero_shot_object_detection
         title: 제로샷(zero-shot) 객체 탐지
       - local: tasks/zero_shot_image_classification
@@ -81,9 +263,8 @@
         title: 키포인트 탐지
       - local: tasks/knowledge_distillation_for_image_classification
         title: 컴퓨터 비전(이미지 분류)를 위한 지식 증류(knowledge distillation)
-    title: 컴퓨터 비전
-  - isExpanded: false
-    sections:
+      title: 컴퓨터 비전
+    - sections:
       - local: tasks/image_captioning
         title: 이미지 캡셔닝
       - local: tasks/document_question_answering
@@ -92,186 +273,55 @@
         title: 시각적 질의응답 (Visual Question Answering)
       - local: in_translation
         title: (번역중) Text to speech
-    title: 멀티모달
-  - isExpanded: false
-    sections:
-    - local: generation_strategies
-      title: 텍스트 생성 전략 사용자 정의
-    - local: serving
-      title: 모델 서빙하기
-    title: 생성
-  - isExpanded: false
-    sections:
-    - local: tasks/idefics
-      title: IDEFICS를 이용한 이미지 작업
-    - local: tasks/prompting
-      title: 대규모 언어 모델 프롬프팅 가이드
-    title: 프롬프팅
-  title: 태스크 가이드
-- sections:
-  - local: fast_tokenizers
-    title: 🤗 Tokenizers 라이브러리에서 토크나이저 사용하기
-  - local: multilingual
-    title: 다국어 모델 추론하기
-  - local: create_a_model
-    title: 모델별 API 사용하기
-  - local: custom_models
-    title: 사용자 정의 모델 공유하기
-  - local: chat_templating
-    title: 챗봇 템플릿 익히기
-  - local: trainer
-    title: Trainer 사용하기
-  - local: sagemaker
-    title: Amazon SageMaker에서 학습 실행하기
-  - local: serialization
-    title: ONNX로 내보내기
-  - local: gpu_selection
-    title: GPU 선택하기
-  - local: tflite
-    title: TFLite로 내보내기
-  - local: torchscript
-    title: TorchScript로 내보내기
+      - local: tasks/idefics
+        title: IDEFICS를 이용한 이미지 작업
+      - local: in_translation
+        title: (번역중) Image-text-to-text
+      - local: in_translation
+        title: (번역중) Video-text-to-text
+      - local: in_translation
+        title: (번역중) Visual Document Retrieval
+      title: 멀티모달
+    title: 태스크 레시피
+  - local: run_scripts
+    title: 스크립트로 학습하기
+  - local: glossary
+    title: Glossary
+  - local: philosophy
+    title: 이념과 목표
   - local: in_translation
     title: (번역중) Notebooks with examples
   - local: community
     title: 커뮤니티 리소스
   - local: troubleshooting
     title: 문제 해결
-  - local: gguf
-    title: GGUF 파일들과의 상호 운용성
-  - local: modular_transformers
-    title: transformers에서의 모듈성
-  title: (번역중) 개발자 가이드
-- sections:
-  - local: in_translation
-    title: (번역중) Getting started
-  - local: quantization/bitsandbytes
-    title: bitsandbytes
-  - local: quantization/compressed_tensors
-    title: compressed-tensors
-  - local: quantization/gptq
-    title: GPTQ
-  - local: quantization/awq
-    title: AWQ
-  - local: in_translation
-    title: (번역중) AQLM
-  - local: in_translation
-    title: (번역중) VPTQ
-  - local: quantization/quanto
-    title: Quanto
-  - local: quantization/quark
-    title: Quark
-  - local: quantization/eetq
-    title: EETQ
-  - local: in_translation
-    title: (번역중) HQQ
-  - local: in_translation
-    title: (번역중) Optimum
-  - local: in_translation
-    title: (번역중) Contribute new quantization method
-  title: (번역중) 경량화 메소드
-- sections:
-  - local: performance
-    title: 성능 및 확장성
-  - local: in_translation
-    title: (번역중) Quantization
-  - local: llm_optims
-    title: LLM 추론 최적화
-  - sections:
-    - local: in_translation
-      title: (번역중) Methods and tools for efficient training on a single GPU
-    - local: perf_train_gpu_many
-      title: 다중 GPU에서 훈련 진행하기
-    - local: deepspeed
-      title: DeepSpeed
-    - local: fsdp
-      title: 완전 분할 데이터 병렬 처리
-    - local: perf_train_cpu
-      title: CPU에서 훈련
-    - local: perf_train_cpu_many
-      title: 다중 CPU에서 훈련하기
-    - local: perf_train_tpu_tf
-      title: TensorFlow로 TPU에서 훈련하기
-    - local: perf_train_special
-      title: Apple 실리콘에서 PyTorch 학습
-    - local: perf_hardware
-      title: 훈련용 사용자 맞춤형 하드웨어
-    - local: hpo_train
-      title: Trainer API를 사용한 하이퍼파라미터 탐색
-    title: (번역중) 효율적인 학습 기술들
-  - sections:
-    - local: perf_infer_cpu
-      title: CPU로 추론하기
-    - local: perf_infer_gpu_one
-      title: 하나의 GPU를 활용한 추론
-    - local: perf_infer_gpu_multi
-      title: 다중 GPU를 활용한 추론
-    title: 추론 최적화하기
-  - local: big_models
-    title: 대형 모델을 인스턴스화
-  - local: debugging
-    title: 디버깅
-  - local: tf_xla
-    title: TensorFlow 모델을 위한 XLA 통합
-  - local: in_translation
-    title: (번역중) Optimize inference using `torch.compile()`
-  title: (번역중) 성능 및 확장성
-- sections:
-    - local: contributing
-      title: 🤗 Transformers에 기여하는 방법
-    - local: add_new_model
-      title: 🤗 Transformers에 새로운 모델을 추가하는 방법
-    - local: add_new_pipeline
-      title: 어떻게 🤗 Transformers에 파이프라인을 추가하나요?
-    - local: testing
-      title: 테스트
-    - local: pr_checks
-      title: Pull Request에 대한 검사
+  title: 리소스
+- isExpanded: false
+  sections:
+  - local: contributing
+    title: 🤗 Transformers에 기여하는 방법
+  - local: testing
+    title: Transformers 모델 테스트하기
+  - local: pr_checks
+    title: Pull request 검사하기
   title: 기여하기
-- sections:
-  - local: philosophy
-    title: 이념과 목표
-  - local: glossary
-    title: (번역중) Glossary
-  - local: task_summary
-    title: 🤗 Transformers로 할 수 있는 작업
-  - local: tasks_explained
-    title: 🤗 Transformers로 작업을 해결하는 방법
-  - local: model_summary
-    title: Transformer 모델군
-  - local: tokenizer_summary
-    title: 토크나이저 요약
-  - local: attention
-    title: 어텐션 매커니즘
-  - local: pad_truncation
-    title: 패딩과 잘라내기
-  - local: bertology
-    title: BERTology
-  - local: perplexity
-    title: 고정 길이 모델의 펄플렉서티(Perplexity)
-  - local: pipeline_webserver
-    title: 추론 웹 서버를 위한 파이프라인
-  - local: model_memory_anatomy
-    title: 모델 학습 해부하기
-  - local: llm_tutorial_optimization
-    title: LLM을 최대한 활용하기
-  title: (번역중) 개념 가이드
-- sections:
+- isExpanded: false
+  sections:
   - sections:
     - local: model_doc/auto
-      title: 자동 클래스
+      title: Auto Classes
     - local: in_translation
       title: (번역중) Backbones
     - local: main_classes/callback
-      title: 콜백
+      title: Callbacks
     - local: main_classes/configuration
-      title: 구성
+      title: Configuration
     - local: main_classes/data_collator
-      title: 데이터 콜레이터
+      title: Data Collator
     - local: main_classes/keras_callbacks
-      title: 케라스 콜백
+      title: Keras callbacks
     - local: main_classes/logging
-      title: 로깅
+      title: Logging
     - local: main_classes/model
       title: Models
     - local: main_classes/text_generation
@@ -280,32 +330,39 @@
       title: ONNX
     - local: in_translation
       title: (번역중) Optimization
-    - local: in_translation
-      title: 모델 출력
     - local: main_classes/output
-      title: (번역중) Pipelines
+      title: 모델 출력
+    - local: main_classes/peft
+      title: PEFT
     - local: in_translation
-      title: (번역중) Processors
+      title: (번역중) Pipelines
+    - local: main_classes/tokenizer
+      title: 토크나이저
     - local: main_classes/quantization
       title: 양자화
     - local: in_translation
       title: (번역중) Tokenizer
     - local: main_classes/trainer
       title: Trainer
-    - local: deepspeed
-      title: DeepSpeed
-    - local: main_classes/executorch
+    - local: in_translation
+      title: (번역중) DeepSpeed
+    - local: in_translation
       title: ExecuTorch
     - local: main_classes/feature_extractor
-      title: 특성 추출기
+      title: 피쳐 추출기
     - local: in_translation
       title: (번역중) Image Processor
-    title: (번역중) 메인 클래스
+    - local: in_translation
+      title: (번역중) Video Processor
+    title: 메인 클래스
   - sections:
-    - isExpanded: false
-      sections:
+    - sections:
+      - local: model_doc/albert
+        title: ALBERT
+      - local: in_translation
+        title: Arcee
       - local: in_translation
-        title: (번역중) ALBERT
+        title: Bamba
       - local: model_doc/bart
         title: BART
       - local: model_doc/barthez
@@ -315,43 +372,49 @@
       - local: model_doc/bert
         title: BERT
       - local: in_translation
-        title: (번역중) BertGeneration
+        title: BertGeneration
       - local: model_doc/bert-japanese
-        title: 일본어 Bert
+        title: BertJapanese
       - local: model_doc/bertweet
-        title: Bertweet
+        title: BERTweet
       - local: in_translation
-        title: (번역중) BigBird
+        title: BigBird
       - local: in_translation
-        title: (번역중) BigBirdPegasus
+        title: BigBirdPegasus
       - local: model_doc/biogpt
         title: BioGpt
       - local: in_translation
-        title: (번역중) Blenderbot
+        title: BitNet
       - local: in_translation
-        title: (번역중) Blenderbot Small
+        title: Blenderbot
       - local: in_translation
-        title: (번역중) BLOOM
+        title: Blenderbot Small
       - local: in_translation
-        title: (번역중) BORT
+        title: BLOOM
       - local: in_translation
-        title: (번역중) ByT5
+        title: BORT
       - local: in_translation
-        title: (번역중) CamemBERT
+        title: ByT5
       - local: in_translation
-        title: (번역중) CANINE
+        title: CamemBERT
+      - local: in_translation
+        title: CANINE
       - local: model_doc/codegen
         title: CodeGen
+      - local: in_translation
+        title: CodeLlama
       - local: model_doc/cohere
         title: Cohere
+      - local: in_translation
+        title: Cohere2
       - local: model_doc/convbert
         title: ConvBERT
       - local: in_translation
-        title: (번역중) CPM
+        title: CPM
       - local: in_translation
-        title: (번역중) CPMANT
+        title: CPMANT
       - local: in_translation
-        title: (번역중) CTRL
+        title: CTRL
       - local: model_doc/dbrx
         title: DBRX
       - local: model_doc/deberta
@@ -359,77 +422,121 @@
       - local: model_doc/deberta-v2
         title: DeBERTa-v2
       - local: in_translation
-        title: (번역중) DialoGPT
+        title: DeepSeek-V3
+      - local: in_translation
+        title: DialoGPT
+      - local: in_translation
+        title: DiffLlama
+      - local: in_translation
+        title: DistilBERT
       - local: in_translation
-        title: (번역중) DistilBERT
+        title: Doge
       - local: in_translation
-        title: (번역중) DPR
+        title: dots1
+      - local: in_translation
+        title: DPR
       - local: model_doc/electra
         title: ELECTRA
       - local: model_doc/encoder-decoder
-        title: 인코더 디코더 모델
+        title: Encoder Decoder Models
       - local: in_translation
-        title: (번역중) ERNIE
+        title: ERNIE
       - local: in_translation
-        title: (번역중) ErnieM
+        title: ErnieM
       - local: model_doc/esm
         title: ESM
+      - local: model_doc/exaone4
+        title: EXAONE-4.0
+      - local: in_translation
+        title: Falcon
+      - local: in_translation
+        title: Falcon3
+      - local: in_translation
+        title: FalconH1
+      - local: in_translation
+        title: FalconMamba
+      - local: in_translation
+        title: FLAN-T5
       - local: in_translation
-        title: (번역중) FLAN-T5
+        title: FLAN-UL2
       - local: in_translation
-        title: (번역중) FLAN-UL2
+        title: FlauBERT
       - local: in_translation
-        title: (번역중) FlauBERT
+        title: FNet
       - local: in_translation
-        title: (번역중) FNet
+        title: FSMT
       - local: in_translation
-        title: (번역중) FSMT
+        title: Funnel Transformer
       - local: in_translation
-        title: (번역중) Funnel Transformer
+        title: Fuyu
       - local: model_doc/gemma
         title: Gemma
       - local: model_doc/gemma2
         title: Gemma2
+      - local: in_translation
+        title: GLM
+      - local: in_translation
+        title: glm4
       - local: model_doc/openai-gpt
         title: GPT
       - local: in_translation
-        title: (번역중) GPT Neo
+        title: GPT Neo
       - local: in_translation
-        title: (번역중) GPT NeoX
+        title: GPT NeoX
       - local: model_doc/gpt_neox_japanese
         title: GPT NeoX Japanese
       - local: in_translation
-        title: (번역중) GPT-J
+        title: GPT-J
+      - local: in_translation
+        title: GPT2
+      - local: in_translation
+        title: GPTBigCode
+      - local: in_translation
+        title: GPTSAN Japanese
+      - local: in_translation
+        title: GPTSw3
+      - local: in_translation
+        title: Granite
+      - local: in_translation
+        title: GraniteMoe
+      - local: in_translation
+        title: GraniteMoeHybrid
+      - local: in_translation
+        title: GraniteMoeShared
+      - local: in_translation
+        title: Helium
       - local: in_translation
-        title: (번역중) GPT2
+        title: HerBERT
       - local: in_translation
-        title: (번역중) GPTBigCode
+        title: HGNet-V2
       - local: in_translation
-        title: (번역중) GPTSAN Japanese
+        title: I-BERT
       - local: in_translation
-        title: (번역중) GPTSw3
+        title: Jamba
       - local: in_translation
-        title: (번역중) HerBERT
+        title: JetMoe
       - local: in_translation
-        title: (번역중) I-BERT
+        title: Jukebox
       - local: in_translation
-        title: (번역중) Jukebox
+        title: LED
       - local: in_translation
-        title: (번역중) LED
+        title: LFM2
       - local: model_doc/llama
         title: LLaMA
       - local: model_doc/llama2
-        title: LLaMA2
+        title: Llama2
       - local: model_doc/llama3
-        title: LLaMA3
+        title: Llama3
       - local: in_translation
-        title: (번역중) Longformer
+        title: Longformer
       - local: in_translation
-        title: (번역중) LongT5
+        title: LongT5
       - local: in_translation
-        title: (번역중) LUKE
+        title: LUKE
       - local: in_translation
-        title: (번역중) M2M100
+        title: M2M100
+      - local: in_translation
+        title: MADLAD-400
       - local: model_doc/mamba
         title: Mamba
       - local: model_doc/mamba2
@@ -437,164 +544,265 @@
       - local: model_doc/marian
         title: MarianMT
       - local: in_translation
-        title: (번역중) MarkupLM
+        title: MarkupLM
+      - local: in_translation
+        title: MBart and MBart-50
       - local: in_translation
-        title: (번역중) MBart and MBart-50
+        title: MEGA
       - local: in_translation
-        title: (번역중) MEGA
+        title: MegatronBERT
       - local: in_translation
-        title: (번역중) MegatronBERT
+        title: MegatronGPT2
       - local: in_translation
-        title: (번역중) MegatronGPT2
+        title: MiniMax
       - local: model_doc/mistral
         title: Mistral
       - local: in_translation
-        title: (번역중) mLUKE
+        title: Mixtral
+      - local: in_translation
+        title: mLUKE
+      - local: in_translation
+        title: MobileBERT
+      - local: in_translation
+        title: ModernBert
+      - local: in_translation
+        title: ModernBERTDecoder
+      - local: in_translation
+        title: MPNet
+      - local: in_translation
+        title: MPT
+      - local: in_translation
+        title: MRA
+      - local: in_translation
+        title: MT5
+      - local: in_translation
+        title: MVP
+      - local: in_translation
+        title: myt5
       - local: in_translation
-        title: (번역중) MobileBERT
+        title: Nemotron
       - local: in_translation
-        title: (번역중) MPNet
+        title: NEZHA
       - local: in_translation
-        title: (번역중) MT5
+        title: NLLB
       - local: in_translation
-        title: (번역중) MVP
+        title: NLLB-MoE
       - local: in_translation
-        title: (번역중) NEZHA
+        title: Nyströmformer
       - local: in_translation
-        title: (번역중) NLLB
+        title: OLMo
       - local: in_translation
-        title: (번역중) NLLB-MoE
+        title: OLMo2
       - local: in_translation
-        title: (번역중) Nyströmformer
+        title: OLMoE
       - local: in_translation
-        title: (번역중) Open-Llama
+        title: Open-Llama
       - local: in_translation
-        title: (번역중) OPT
+        title: OPT
       - local: in_translation
-        title: (번역중) Pegasus
+        title: Pegasus
       - local: in_translation
-        title: (번역중) PEGASUS-X
+        title: PEGASUS-X
       - local: in_translation
-        title: (번역중) PhoBERT
+        title: Persimmon
       - local: in_translation
-        title: (번역중) PLBart
+        title: Phi
       - local: in_translation
-        title: (번역중) ProphetNet
+        title: Phi-3
       - local: in_translation
-        title: (번역중) QDQBert
+        title: PhiMoE
+      - local: in_translation
+        title: PhoBERT
+      - local: in_translation
+        title: PLBart
+      - local: in_translation
+        title: ProphetNet
+      - local: in_translation
+        title: QDQBert
+      - local: in_translation
+        title: Qwen2
+      - local: in_translation
+        title: Qwen2MoE
+      - local: in_translation
+        title: Qwen3
+      - local: in_translation
+        title: Qwen3MoE
       - local: model_doc/rag
-        title: RAG(검색 증강 생성)
+        title: RAG
+      - local: in_translation
+        title: REALM
       - local: in_translation
-        title: (번역중) REALM
+        title: RecurrentGemma
       - local: in_translation
-        title: (번역중) Reformer
+        title: Reformer
       - local: in_translation
-        title: (번역중) RemBERT
+        title: RemBERT
       - local: in_translation
-        title: (번역중) RetriBERT
+        title: RetriBERT
       - local: model_doc/roberta
         title: RoBERTa
       - local: in_translation
-        title: (번역중) RoBERTa-PreLayerNorm
+        title: RoBERTa-PreLayerNorm
+      - local: in_translation
+        title: RoCBert
+      - local: in_translation
+        title: RoFormer
+      - local: in_translation
+        title: RWKV
+      - local: in_translation
+        title: Splinter
+      - local: in_translation
+        title: SqueezeBERT
+      - local: in_translation
+        title: StableLm
+      - local: in_translation
+        title: Starcoder2
+      - local: in_translation
+        title: SwitchTransformers
+      - local: in_translation
+        title: T5
+      - local: in_translation
+        title: T5Gemma
+      - local: in_translation
+        title: T5v1.1
+      - local: in_translation
+        title: TAPEX
+      - local: in_translation
+        title: Transformer XL
+      - local: in_translation
+        title: UL2
+      - local: in_translation
+        title: UMT5
+      - local: in_translation
+        title: X-MOD
+      - local: in_translation
+        title: XGLM
+      - local: in_translation
+        title: XLM
+      - local: in_translation
+        title: XLM-ProphetNet
+      - local: in_translation
+        title: XLM-RoBERTa
+      - local: in_translation
+        title: XLM-RoBERTa-XL
+      - local: in_translation
+        title: XLM-V
+      - local: in_translation
+        title: XLNet
+      - local: in_translation
+        title: YOSO
       - local: in_translation
-        title: (번역중) RoCBert
+        title: Zamba
       - local: in_translation
-        title: (번역중) RoFormer
+        title: Zamba2
+      title: 텍스트 모델
+    - sections:
       - local: in_translation
-        title: (번역중) Splinter
+        title: Aimv2
       - local: in_translation
-        title: (번역중) SqueezeBERT
+        title: BEiT
       - local: in_translation
-        title: (번역중) SwitchTransformers
+        title: BiT
       - local: in_translation
-        title: (번역중) T5
+        title: Conditional DETR
       - local: in_translation
-        title: (번역중) T5v1.1
+        title: ConvNeXT
       - local: in_translation
-        title: (번역중) TAPEX
+        title: ConvNeXTV2
       - local: in_translation
-        title: (번역중) Transformer XL
+        title: CvT
       - local: in_translation
-        title: (번역중) UL2
+        title: D-FINE
       - local: in_translation
-        title: (번역중) X-MOD
+        title: DAB-DETR
       - local: in_translation
-        title: (번역중) XGLM
+        title: DeepSeek-V2
       - local: in_translation
-        title: (번역중) XLM
+        title: Deformable DETR
       - local: in_translation
-        title: (번역중) XLM-ProphetNet
+        title: DeiT
       - local: in_translation
-        title: (번역중) XLM-RoBERTa
+        title: Depth Anything
       - local: in_translation
-        title: (번역중) XLM-RoBERTa-XL
+        title: Depth Anything V2
       - local: in_translation
-        title: (번역중) XLM-V
+        title: DepthPro
       - local: in_translation
-        title: (번역중) XLNet
+        title: DETA
       - local: in_translation
-        title: (번역중) YOSO
-      title: (번역중) 텍스트 모델
-    - isExpanded: false
-      sections:
+        title: DETR
       - local: in_translation
-        title: (번역중) BEiT
+        title: DiNAT
       - local: in_translation
-        title: (번역중) BiT
+        title: DINOV2
       - local: in_translation
-        title: (번역중) Conditional DETR
+        title: DINOv2 with Registers
       - local: in_translation
-        title: (번역중) ConvNeXT
+        title: DiT
       - local: in_translation
-        title: (번역중) ConvNeXTV2
+        title: DPT
       - local: in_translation
-        title: (번역중) CvT
+        title: EfficientFormer
       - local: in_translation
-        title: (번역중) Deformable DETR
+        title: EfficientNet
       - local: in_translation
-        title: (번역중) DeiT
+        title: EoMT
       - local: in_translation
-        title: (번역중) DETA
+        title: FocalNet
       - local: in_translation
-        title: (번역중) DETR
+        title: GLPN
       - local: in_translation
-        title: (번역중) DiNAT
+        title: Hiera
       - local: in_translation
-        title: (번역중) DiT
+        title: I-JEPA
       - local: in_translation
-        title: (번역중) DPT
+        title: ImageGPT
       - local: in_translation
-        title: (번역중) EfficientFormer
+        title: LeViT
       - local: in_translation
-        title: (번역중) EfficientNet
+        title: LightGlue
       - local: in_translation
-        title: (번역중) FocalNet
+        title: Mask2Former
       - local: in_translation
-        title: (번역중) GLPN
+        title: MaskFormer
       - local: in_translation
-        title: (번역중) ImageGPT
+        title: MLCD
       - local: in_translation
-        title: (번역중) LeViT
+        title: MobileNetV1
       - local: in_translation
-        title: (번역중) Mask2Former
+        title: MobileNetV2
       - local: in_translation
-        title: (번역중) MaskFormer
+        title: MobileViT
       - local: in_translation
-        title: (번역중) MobileNetV1
+        title: MobileViTV2
       - local: in_translation
-        title: (번역중) MobileNetV2
+        title: NAT
       - local: in_translation
-        title: (번역중) MobileViT
+        title: PoolFormer
       - local: in_translation
-        title: (번역중) NAT
+        title: Prompt Depth Anything
       - local: in_translation
-        title: (번역중) PoolFormer
+        title: Pyramid Vision Transformer (PVT)
       - local: in_translation
-        title: (번역중) RegNet
+        title: Pyramid Vision Transformer v2 (PVTv2)
       - local: in_translation
-        title: (번역중) ResNet
+        title: RegNet
       - local: in_translation
-        title: (번역중) SegFormer
+        title: ResNet
+      - local: in_translation
+        title: RT-DETR
+      - local: in_translation
+        title: RT-DETRv2
+      - local: in_translation
+        title: SegFormer
+      - local: in_translation
+        title: SegGpt
+      - local: in_translation
+        title: SuperGlue
+      - local: in_translation
+        title: SuperPoint
+      - local: in_translation
+        title: SwiftFormer
       - local: model_doc/swin
         title: Swin Transformer
       - local: model_doc/swinv2
@@ -602,166 +810,305 @@
       - local: model_doc/swin2sr
         title: Swin2SR
       - local: in_translation
-        title: (번역중) Table Transformer
-      - local: in_translation
-        title: (번역중) TimeSformer
+        title: Table Transformer
       - local: in_translation
-        title: (번역중) UperNet
+        title: TextNet
       - local: in_translation
-        title: (번역중) VAN
+        title: Timm Wrapper
       - local: in_translation
-        title: (번역중) VideoMAE
+        title: UperNet
       - local: in_translation
-        title: Vision Transformer (ViT)
+        title: VAN
       - local: model_doc/vit
-        title: (번역중) ViT Hybrid
+        title: Vision Transformer (ViT)
+      - local: in_translation
+        title: ViT Hybrid
+      - local: in_translation
+        title: ViTDet
+      - local: in_translation
+        title: ViTMAE
+      - local: in_translation
+        title: ViTMatte
+      - local: in_translation
+        title: ViTMSN
+      - local: in_translation
+        title: ViTPose
+      - local: in_translation
+        title: YOLOS
+      - local: in_translation
+        title: ZoeDepth
+      title: 비전 모델
+    - sections:
+      - local: in_translation
+        title: Audio Spectrogram Transformer
+      - local: in_translation
+        title: Bark
+      - local: in_translation
+        title: CLAP
+      - local: in_translation
+        title: CSM
+      - local: in_translation
+        title: dac
       - local: in_translation
-        title: (번역중) ViTMAE
+        title: Dia
       - local: in_translation
-        title: (번역중) ViTMSN
+        title: EnCodec
       - local: in_translation
-        title: (번역중) YOLOS
-      title: (번역중) 비전 모델
-    - isExpanded: false
-      sections:
+        title: FastSpeech2Conformer
       - local: in_translation
-        title: (번역중) Audio Spectrogram Transformer
+        title: GraniteSpeech
       - local: in_translation
-        title: (번역중) CLAP
+        title: Hubert
       - local: in_translation
-        title: (번역중) Hubert
+        title: Kyutai Speech-To-Text
       - local: in_translation
-        title: (번역중) MCTCT
+        title: MCTCT
       - local: in_translation
-        title: (번역중) SEW
+        title: Mimi
       - local: in_translation
-        title: (번역중) SEW-D
+        title: MMS
       - local: in_translation
-        title: (번역중) Speech2Text
+        title: Moonshine
       - local: in_translation
-        title: (번역중) Speech2Text2
+        title: Moshi
       - local: in_translation
-        title: (번역중) SpeechT5
+        title: MusicGen
       - local: in_translation
-        title: (번역중) UniSpeech
+        title: MusicGen Melody
       - local: in_translation
-        title: (번역중) UniSpeech-SAT
+        title: Pop2Piano
       - local: in_translation
-        title: (번역중) Wav2Vec2
+        title: Seamless-M4T
       - local: in_translation
-        title: (번역중) Wav2Vec2-Conformer
+        title: SeamlessM4T-v2
       - local: in_translation
-        title: (번역중) Wav2Vec2Phoneme
+        title: SEW
       - local: in_translation
-        title: (번역중) WavLM
+        title: SEW-D
+      - local: in_translation
+        title: Speech2Text
+      - local: in_translation
+        title: Speech2Text2
+      - local: in_translation
+        title: SpeechT5
+      - local: in_translation
+        title: UniSpeech
+      - local: in_translation
+        title: UniSpeech-SAT
+      - local: in_translation
+        title: UnivNet
+      - local: in_translation
+        title: VITS
+      - local: in_translation
+        title: Wav2Vec2
+      - local: in_translation
+        title: Wav2Vec2-BERT
+      - local: in_translation
+        title: Wav2Vec2-Conformer
+      - local: in_translation
+        title: Wav2Vec2Phoneme
+      - local: in_translation
+        title: WavLM
       - local: model_doc/whisper
         title: Whisper
       - local: in_translation
-        title: (번역중) XLS-R
+        title: XLS-R
       - local: in_translation
-        title: (번역중) XLSR-Wav2Vec2
-      title: (번역중) 오디오 모델
-    - isExpanded: false
-      sections:
+        title: XLSR-Wav2Vec2
+      title: 오디오 모델
+    - sections:
       - local: model_doc/timesformer
         title: TimeSformer
       - local: in_translation
-        title: (번역중) VideoMAE
+        title: V-JEPA 2
+      - local: in_translation
+        title: VideoMAE
       - local: model_doc/vivit
         title: ViViT
-      title: (번역중) 비디오 모델
-    - isExpanded: false
-      sections:
+      title: 비디오 모델
+    - sections:
       - local: in_translation
-        title: (번역중) ALIGN
+        title: ALIGN
       - local: model_doc/altclip
         title: AltCLIP
-      - local: model_doc/blip-2
-        title: BLIP-2
+      - local: in_translation
+        title: Aria
+      - local: in_translation
+        title: AyaVision
       - local: model_doc/blip
         title: BLIP
+      - local: model_doc/blip-2
+        title: BLIP-2
+      - local: in_translation
+        title: BridgeTower
       - local: in_translation
-        title: (번역중) BridgeTower
+        title: BROS
       - local: model_doc/chameleon
         title: Chameleon
       - local: in_translation
-        title: (번역중) Chinese-CLIP
+        title: Chinese-CLIP
       - local: model_doc/clip
         title: CLIP
       - local: in_translation
-        title: (번역중) CLIPSeg
+        title: CLIPSeg
+      - local: in_translation
+        title: CLVP
+      - local: in_translation
+        title: ColPali
+      - local: in_translation
+        title: ColQwen2
       - local: in_translation
-        title: (번역중) Data2Vec
+        title: Data2Vec
       - local: in_translation
-        title: (번역중) DePlot
+        title: DePlot
       - local: in_translation
-        title: (번역중) Donut
+        title: Donut
       - local: in_translation
-        title: (번역중) FLAVA
+        title: Emu3
       - local: in_translation
-        title: (번역중) GIT
+        title: FLAVA
       - local: in_translation
-        title: (번역중) GroupViT
+        title: Gemma3
       - local: in_translation
-        title: (번역중) LayoutLM
+        title: Gemma3n
       - local: in_translation
-        title: (번역중) LayoutLMV2
+        title: GIT
       - local: in_translation
-        title: (번역중) LayoutLMV3
+        title: glm4v
       - local: in_translation
-        title: (번역중) LayoutXLM
+        title: GOT-OCR2
       - local: in_translation
-        title: (번역중) LiLT
+        title: GraniteVision
       - local: in_translation
-        title: (번역중) LXMERT
+        title: Grounding DINO
       - local: in_translation
-        title: (번역중) MatCha
+        title: GroupViT
       - local: in_translation
-        title: (번역중) MGP-STR
+        title: IDEFICS
       - local: in_translation
-        title: (번역중) OneFormer
+        title: Idefics2
       - local: in_translation
-        title: (번역중) OWL-ViT
+        title: Idefics3
+      - local: in_translation
+        title: InstructBLIP
+      - local: in_translation
+        title: InstructBlipVideo
+      - local: in_translation
+        title: InternVL
+      - local: in_translation
+        title: Janus
+      - local: in_translation
+        title: KOSMOS-2
+      - local: in_translation
+        title: LayoutLM
+      - local: in_translation
+        title: LayoutLMV2
+      - local: in_translation
+        title: LayoutLMV3
+      - local: in_translation
+        title: LayoutXLM
+      - local: in_translation
+        title: LiLT
+      - local: in_translation
+        title: Llama4
+      - local: in_translation
+        title: Llava
+      - local: in_translation
+        title: LLaVA-NeXT
+      - local: in_translation
+        title: LLaVa-NeXT-Video
+      - local: in_translation
+        title: LLaVA-Onevision
+      - local: in_translation
+        title: LXMERT
+      - local: in_translation
+        title: MatCha
+      - local: in_translation
+        title: MGP-STR
+      - local: in_translation
+        title: Mistral3
+      - local: in_translation
+        title: mllama
+      - local: in_translation
+        title: Nougat
+      - local: in_translation
+        title: OmDet-Turbo
+      - local: in_translation
+        title: OneFormer
+      - local: in_translation
+        title: OWL-ViT
+      - local: in_translation
+        title: OWLv2
       - local: model_doc/paligemma
         title: PaliGemma
       - local: in_translation
-        title: (번역중) Perceiver
+        title: Perceiver
+      - local: in_translation
+        title: PerceptionLM
+      - local: in_translation
+        title: Phi4 Multimodal
+      - local: in_translation
+        title: Pix2Struct
+      - local: in_translation
+        title: Pixtral
       - local: in_translation
-        title: (번역중) Pix2Struct
+        title: Qwen2.5-Omni
+      - local: in_translation
+        title: Qwen2.5-VL
+      - local: in_translation
+        title: Qwen2Audio
       - local: model_doc/qwen2_vl
         title: Qwen2VL
       - local: in_translation
-        title: (번역중) Segment Anything
+        title: Segment Anything
+      - local: in_translation
+        title: Segment Anything High Quality
+      - local: in_translation
+        title: ShieldGemma2
       - local: model_doc/siglip
         title: SigLIP
       - local: in_translation
-        title: (번역중) Speech Encoder Decoder Models
+        title: SigLIP2
+      - local: in_translation
+        title: SmolLM3
       - local: in_translation
-        title: (번역중) TAPAS
+        title: SmolVLM
       - local: in_translation
-        title: (번역중) TrOCR
+        title: Speech Encoder Decoder Models
       - local: in_translation
-        title: (번역중) TVLT
+        title: TAPAS
       - local: in_translation
-        title: (번역중) ViLT
+        title: TrOCR
       - local: in_translation
-        title: (번역중) Vision Encoder Decoder Models
+        title: TVLT
+      - local: model_doc/tvp
+        title: TVP
       - local: in_translation
-        title: (번역중) Vision Text Dual Encoder
+        title: UDOP
       - local: in_translation
-        title: (번역중) VisualBERT
+        title: VideoLlava
       - local: in_translation
-        title: (번역중) X-CLIP
-      title: (번역중) 멀티모달 모델
-    - isExpanded: false
-      sections:
+        title: ViLT
       - local: in_translation
-        title: (번역중) Decision Transformer
+        title: VipLlava
+      - local: in_translation
+        title: Vision Encoder Decoder Models
+      - local: in_translation
+        title: Vision Text Dual Encoder
+      - local: in_translation
+        title: VisualBERT
+      - local: in_translation
+        title: Voxtral
+      - local: in_translation
+        title: X-CLIP
+      title: 멀티모달 모델
+    - sections:
+      - local: in_translation
+        title: Decision Transformer
       - local: model_doc/trajectory_transformer
-        title: 궤적 트랜스포머
-      title: (번역중) 강화학습 모델
-    - isExpanded: false
-      sections:
+        title: Trajectory Transformer
+      title: 강화학습 모델
+    - sections:
       - local: model_doc/autoformer
         title: Autoformer
       - local: model_doc/informer
@@ -771,17 +1118,20 @@
       - local: model_doc/patchtst
         title: PatchTST
       - local: model_doc/time_series_transformer
-        title: 시계열 트랜스포머
-      title: 시계열 모델
-    - isExpanded: false
-      sections:
+        title: Time Series Transformer
+      - local: in_translation
+        title: TimesFM
+      title: 시게열 모델
+    - sections:
       - local: model_doc/graphormer
         title: Graphormer
       title: 그래프 모델
-    title: (번역중) 모델
+    title: 모델
   - sections:
     - local: internal/modeling_utils
       title: 사용자 정의 레이어 및 유틸리티
+    - local: in_translation
+      title: (번역중) Utilities for Model Debugging
     - local: internal/pipelines_utils
       title: 파이프라인을 위한 유틸리티
     - local: internal/tokenization_utils
@@ -796,7 +1146,13 @@
       title: 오디오 처리를 위한 유틸리티
     - local: internal/file_utils
       title: 일반 유틸리티
+    - local: in_translation
+      title: (번역중) Importing Utilities
     - local: internal/time_series_utils
       title: 시계열을 위한 유틸리티
-    title: (번역중) Internal Helpers
-  title: (번역중) API
+    title: 내부 헬퍼(Internal helpers)
+  - sections:
+    - local: in_translation
+      title: (번역중)Environment Variables
+    title: Reference
+  title: API

From 5d3a7122ccb940e26426bbf92da4a3c7e69de357 Mon Sep 17 00:00:00 2001
From: Max <naturale@hufs.ac.kr>
Date: Tue, 5 Aug 2025 13:12:00 +0900
Subject: [PATCH 15/15] fix: update toctree

---
 docs/source/ko/_toctree.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/ko/_toctree.yml b/docs/source/ko/_toctree.yml
index b1ebddf57986..ae9c18d8dbcf 100644
--- a/docs/source/ko/_toctree.yml
+++ b/docs/source/ko/_toctree.yml
@@ -169,8 +169,8 @@
     title: (번역중) BitNet
   - local: quantization/bitsandbytes
     title: bitsandbytes
-  - local: in_translation
-    title: (번역중) compressed-tensors
+  - local: quantization/compressed_tensors
+    title: compressed-tensors
   - local: quantization/eetq
     title: EETQ
   - local: in_translation