mirror of
https://github.com/microsoft/BitNet.git
synced 2026-05-03 11:20:36 +00:00
fix weird character issue
This commit is contained in:
@@ -272,7 +272,10 @@ class Model(ABC):
|
||||
tokens.append(f"[PAD{i}]")
|
||||
toktypes.append(gguf.TokenType.USER_DEFINED)
|
||||
elif reverse_vocab[i] in added_vocab:
|
||||
tokens.append(reverse_vocab[i])
|
||||
# We need to manually encode and decode the added tokens in case special characters
|
||||
# used for `\n` / `\t` have been manually added in the added tokens
|
||||
encoded_decoded_token = tokenizer.decode(tokenizer.encode(reverse_vocab[i]))
|
||||
tokens.append(encoded_decoded_token)
|
||||
if tokenizer.added_tokens_decoder[i].special:
|
||||
toktypes.append(gguf.TokenType.CONTROL)
|
||||
else:
|
||||
@@ -280,7 +283,6 @@ class Model(ABC):
|
||||
else:
|
||||
tokens.append(reverse_vocab[i])
|
||||
toktypes.append(gguf.TokenType.NORMAL)
|
||||
|
||||
return tokens, toktypes, tokpre
|
||||
|
||||
# NOTE: this function is generated by convert-hf-to-gguf-update.py
|
||||
@@ -674,7 +676,7 @@ def read_model_config(model_dir: str) -> dict[str, Any]:
|
||||
with open(config, "r") as f:
|
||||
return json.load(f)
|
||||
|
||||
@Model.register("LLaMAForCausalLM", "LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
|
||||
@Model.register("LLaMAForCausalLM", "LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM", "Falcon3ForCausalLM")
|
||||
class LlamaModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.LLAMA
|
||||
|
||||
|
||||
Reference in New Issue
Block a user