72 lines
2.8 KiB
Python
72 lines
2.8 KiB
Python
"""
|
|
This script is used to convert the original model to OpenVINO IR format.
|
|
The Origin Code can check https://github.com/OpenVINO-dev-contest/chatglm3.openvino/blob/main/convert.py
|
|
"""
|
|
from transformers import AutoTokenizer, AutoConfig
|
|
from optimum.intel import OVWeightQuantizationConfig
|
|
from optimum.intel.openvino import OVModelForCausalLM
|
|
|
|
import os
|
|
from pathlib import Path
|
|
import argparse
|
|
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(add_help=False)
|
|
parser.add_argument('-h',
|
|
'--help',
|
|
action='help',
|
|
help='Show this help message and exit.')
|
|
parser.add_argument('-m',
|
|
'--model_id',
|
|
default='THUDM/glm-4-9b-chat',
|
|
required=False,
|
|
type=str,
|
|
help='orignal model path')
|
|
parser.add_argument('-p',
|
|
'--precision',
|
|
required=False,
|
|
default="int4",
|
|
type=str,
|
|
choices=["fp16", "int8", "int4"],
|
|
help='fp16, int8 or int4')
|
|
parser.add_argument('-o',
|
|
'--output',
|
|
default='./glm-4-9b-ov',
|
|
required=False,
|
|
type=str,
|
|
help='Required. path to save the ir model')
|
|
args = parser.parse_args()
|
|
|
|
ir_model_path = Path(args.output)
|
|
if ir_model_path.exists() == False:
|
|
os.mkdir(ir_model_path)
|
|
|
|
model_kwargs = {
|
|
"trust_remote_code": True,
|
|
"config": AutoConfig.from_pretrained(args.model_id, trust_remote_code=True),
|
|
}
|
|
compression_configs = {
|
|
"sym": False,
|
|
"group_size": 128,
|
|
"ratio": 0.8,
|
|
}
|
|
|
|
print("====Exporting IR=====")
|
|
if args.precision == "int4":
|
|
ov_model = OVModelForCausalLM.from_pretrained(args.model_id, export=True,
|
|
compile=False, quantization_config=OVWeightQuantizationConfig(
|
|
bits=4, **compression_configs), **model_kwargs)
|
|
elif args.precision == "int8":
|
|
ov_model = OVModelForCausalLM.from_pretrained(args.model_id, export=True,
|
|
compile=False, load_in_8bit=True, **model_kwargs)
|
|
else:
|
|
ov_model = OVModelForCausalLM.from_pretrained(args.model_id, export=True,
|
|
compile=False, load_in_8bit=False, **model_kwargs)
|
|
|
|
ov_model.save_pretrained(ir_model_path)
|
|
|
|
print("====Exporting tokenizer=====")
|
|
tokenizer = AutoTokenizer.from_pretrained(
|
|
args.model_id, trust_remote_code=True)
|
|
tokenizer.save_pretrained(ir_model_path) |