From 471943bfd785b7e9d6390596f08e62ec65c0ace7 Mon Sep 17 00:00:00 2001 From: sixgod Date: Fri, 1 Nov 2024 10:21:56 +0000 Subject: [PATCH] support INT4 inference --- basic_demo/trans_cli_vision_demo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/basic_demo/trans_cli_vision_demo.py b/basic_demo/trans_cli_vision_demo.py index 758ccc6..adca35d 100644 --- a/basic_demo/trans_cli_vision_demo.py +++ b/basic_demo/trans_cli_vision_demo.py @@ -17,7 +17,7 @@ from transformers import ( AutoTokenizer, StoppingCriteria, StoppingCriteriaList, - TextIteratorStreamer, AutoModel + TextIteratorStreamer, AutoModel, BitsAndBytesConfig ) from PIL import Image @@ -29,6 +29,8 @@ tokenizer = AutoTokenizer.from_pretrained( trust_remote_code=True, encode_special_tokens=True ) + +## For BF16 inference model = AutoModel.from_pretrained( MODEL_PATH, trust_remote_code=True, @@ -37,7 +39,6 @@ model = AutoModel.from_pretrained( device_map="auto", ).eval() - ## For INT4 inference # model = AutoModel.from_pretrained( # MODEL_PATH,