support INT4 inference
This commit is contained in:
parent
24c8e1b317
commit
471943bfd7
|
@ -17,7 +17,7 @@ from transformers import (
|
||||||
AutoTokenizer,
|
AutoTokenizer,
|
||||||
StoppingCriteria,
|
StoppingCriteria,
|
||||||
StoppingCriteriaList,
|
StoppingCriteriaList,
|
||||||
TextIteratorStreamer, AutoModel
|
TextIteratorStreamer, AutoModel, BitsAndBytesConfig
|
||||||
)
|
)
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
@ -29,6 +29,8 @@ tokenizer = AutoTokenizer.from_pretrained(
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
encode_special_tokens=True
|
encode_special_tokens=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
## For BF16 inference
|
||||||
model = AutoModel.from_pretrained(
|
model = AutoModel.from_pretrained(
|
||||||
MODEL_PATH,
|
MODEL_PATH,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
|
@ -37,7 +39,6 @@ model = AutoModel.from_pretrained(
|
||||||
device_map="auto",
|
device_map="auto",
|
||||||
).eval()
|
).eval()
|
||||||
|
|
||||||
|
|
||||||
## For INT4 inference
|
## For INT4 inference
|
||||||
# model = AutoModel.from_pretrained(
|
# model = AutoModel.from_pretrained(
|
||||||
# MODEL_PATH,
|
# MODEL_PATH,
|
||||||
|
|
Loading…
Reference in New Issue