support INT4 inference

This commit is contained in:
sixgod 2024-11-01 10:21:56 +00:00
parent 24c8e1b317
commit 471943bfd7
1 changed files with 3 additions and 2 deletions

View File

@ -17,7 +17,7 @@ from transformers import (
AutoTokenizer,
StoppingCriteria,
StoppingCriteriaList,
TextIteratorStreamer, AutoModel
TextIteratorStreamer, AutoModel, BitsAndBytesConfig
)
from PIL import Image
@ -29,6 +29,8 @@ tokenizer = AutoTokenizer.from_pretrained(
trust_remote_code=True,
encode_special_tokens=True
)
## For BF16 inference
model = AutoModel.from_pretrained(
MODEL_PATH,
trust_remote_code=True,
@ -37,7 +39,6 @@ model = AutoModel.from_pretrained(
device_map="auto",
).eval()
## For INT4 inference
# model = AutoModel.from_pretrained(
# MODEL_PATH,