support INT4 inference
This commit is contained in:
parent
24c8e1b317
commit
471943bfd7
|
@ -17,7 +17,7 @@ from transformers import (
|
|||
AutoTokenizer,
|
||||
StoppingCriteria,
|
||||
StoppingCriteriaList,
|
||||
TextIteratorStreamer, AutoModel
|
||||
TextIteratorStreamer, AutoModel, BitsAndBytesConfig
|
||||
)
|
||||
|
||||
from PIL import Image
|
||||
|
@ -29,6 +29,8 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|||
trust_remote_code=True,
|
||||
encode_special_tokens=True
|
||||
)
|
||||
|
||||
## For BF16 inference
|
||||
model = AutoModel.from_pretrained(
|
||||
MODEL_PATH,
|
||||
trust_remote_code=True,
|
||||
|
@ -37,7 +39,6 @@ model = AutoModel.from_pretrained(
|
|||
device_map="auto",
|
||||
).eval()
|
||||
|
||||
|
||||
## For INT4 inference
|
||||
# model = AutoModel.from_pretrained(
|
||||
# MODEL_PATH,
|
||||
|
|
Loading…
Reference in New Issue