Fix hard crash when the spiece tokenizer path is bad.
This commit is contained in:
parent
dc300a4569
commit
636d4bfb89
@ -1,4 +1,5 @@
|
|||||||
import torch
|
import torch
|
||||||
|
import os
|
||||||
|
|
||||||
class SPieceTokenizer:
|
class SPieceTokenizer:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -15,6 +16,8 @@ class SPieceTokenizer:
|
|||||||
if isinstance(tokenizer_path, bytes):
|
if isinstance(tokenizer_path, bytes):
|
||||||
self.tokenizer = sentencepiece.SentencePieceProcessor(model_proto=tokenizer_path, add_bos=self.add_bos, add_eos=self.add_eos)
|
self.tokenizer = sentencepiece.SentencePieceProcessor(model_proto=tokenizer_path, add_bos=self.add_bos, add_eos=self.add_eos)
|
||||||
else:
|
else:
|
||||||
|
if not os.path.isfile(tokenizer_path):
|
||||||
|
raise ValueError("invalid tokenizer")
|
||||||
self.tokenizer = sentencepiece.SentencePieceProcessor(model_file=tokenizer_path, add_bos=self.add_bos, add_eos=self.add_eos)
|
self.tokenizer = sentencepiece.SentencePieceProcessor(model_file=tokenizer_path, add_bos=self.add_bos, add_eos=self.add_eos)
|
||||||
|
|
||||||
def get_vocab(self):
|
def get_vocab(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user