support INT4 inference

This commit is contained in:
sixgod 2024-11-01 10:21:56 +00:00
parent 24c8e1b317
commit 471943bfd7
1 changed files with 3 additions and 2 deletions

View File

@ -17,7 +17,7 @@ from transformers import (
AutoTokenizer, AutoTokenizer,
StoppingCriteria, StoppingCriteria,
StoppingCriteriaList, StoppingCriteriaList,
TextIteratorStreamer, AutoModel TextIteratorStreamer, AutoModel, BitsAndBytesConfig
) )
from PIL import Image from PIL import Image
@ -29,6 +29,8 @@ tokenizer = AutoTokenizer.from_pretrained(
trust_remote_code=True, trust_remote_code=True,
encode_special_tokens=True encode_special_tokens=True
) )
## For BF16 inference
model = AutoModel.from_pretrained( model = AutoModel.from_pretrained(
MODEL_PATH, MODEL_PATH,
trust_remote_code=True, trust_remote_code=True,
@ -37,7 +39,6 @@ model = AutoModel.from_pretrained(
device_map="auto", device_map="auto",
).eval() ).eval()
## For INT4 inference ## For INT4 inference
# model = AutoModel.from_pretrained( # model = AutoModel.from_pretrained(
# MODEL_PATH, # MODEL_PATH,