|
30 | 30 | from ..utils import get_gpu_memory |
31 | 31 | from ..weight_loader.model_weight_loader import load_model_weights |
32 | 32 | from .cache_engine import CacheEngine |
33 | | -from .guided_process import GuidedDecodingMangager |
| 33 | +from .guided_process import GuidedDecodingManager |
34 | 34 | from .logits_process import FusedLogitsProcessor, SamplingInputs |
35 | 35 |
|
36 | 36 | logger = get_logger('lmdeploy') |
@@ -315,10 +315,6 @@ def __init__(self, |
315 | 315 | self.cache_config = cache_config |
316 | 316 | # use raw tokenizer |
317 | 317 | self.tokenizer = Tokenizer(model_path).model.model |
318 | | - try: |
319 | | - self.sampling_vocab_size = len(self.tokenizer) |
320 | | - except BaseException: |
321 | | - self.sampling_vocab_size = None |
322 | 318 |
|
323 | 319 | self._pre_in_que = None |
324 | 320 | self._in_que = None |
@@ -354,9 +350,9 @@ def __init__(self, |
354 | 350 | self.cache_engine = None |
355 | 351 | self.profiler: AgentProfiler = None |
356 | 352 | try: |
357 | | - self.guided_decoding_manager = GuidedDecodingMangager(self.tokenizer, self.sampling_vocab_size) |
| 353 | + self.guided_decoding_manager = GuidedDecodingManager(self.tokenizer, model_config.vocab_size) |
358 | 354 | except ValueError as e: |
359 | | - logger.warning(f'Failed to create GuidedManager for tokenizer {self.tokenizer}: {e}') |
| 355 | + logger.warning(f'Failed to create GuidedManager for tokenizer {type(self.tokenizer)}: {e}') |
360 | 356 | self.guided_decoding_manager = None |
361 | 357 |
|
362 | 358 | # microbatch |
@@ -552,7 +548,6 @@ async def async_sampling_logits(self, logits: torch.Tensor, sampling_inputs: Sam |
552 | 548 | with record_function('sampling_logits'): |
553 | 549 | logits_processor = FusedLogitsProcessor( |
554 | 550 | sampling_inputs, |
555 | | - sampling_vocab_size=self.sampling_vocab_size, |
556 | 551 | logprobs_mode=self.misc_config.logprobs_mode, |
557 | 552 | guided_decoding_manager=self.guided_decoding_manager, |
558 | 553 | ) |
|
0 commit comments