Skip to content

Commit 52c13fe

Browse files
committed
update ov multimedia models 062725
1 parent 7e03507 commit 52c13fe

File tree

2 files changed

+180
-41
lines changed

2 files changed

+180
-41
lines changed

llmware/model_configs.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,20 @@
9494
"validation_files": ["openvino_model.xml"],
9595
"link": "https://huggingface.co/llmware/phi-3-ov"},
9696

97-
{"model_name": "qwen2.5-1.5b-instruct-ov", "display_name": "qwen2.5-1.5b-instruct-ov",
97+
# new text-to-image model - more coming soon
98+
{"model_name": "lcm-dreamshaper-ov", "model_family": "OVGenerativeModel",
99+
"model_category": "generative_local", "display_name": "lcm-dreamshaper-ov",
100+
"model_location": "llmware_repo", "pipeline": "text2image",
101+
"context_window": 4096, "instruction_following": False, "prompt_wrapper": "phi_3",
102+
"temperature": 0.0, "sample_default": False, "trailing_space": "",
103+
"tokenizer_local": "tokenizer_phi3.json",
104+
"hf_repo": "llmware/lcm-dreamshaper-ov",
105+
"custom_model_files": [], "custom_model_repo": "",
106+
"fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"},
107+
"validation_files": [],
108+
"link": "https://huggingface.co/llmware/lcm-dreamshaper-ov"},
109+
110+
{"model_name": "qwen2.5-1.5b-instruct-ov", "display_name": "qwen2.5-1.5b-instruct-ov",
98111
"model_family": "OVGenerativeModel", "model_category": "generative_local",
99112
"model_location": "llmware_repo", "context_window": 4096, "instruction_following": False,
100113
"prompt_wrapper": "hf_chat", "temperature": 0.3, "trailing_space": "",
@@ -2604,7 +2617,57 @@
26042617
"validation_files": [],
26052618
"custom_model_files": [], "custom_model_repo": ""},
26062619

2607-
{"model_name": "llama-3.2-3b-instruct-ov", "display_name": "llama-3.2-3b-instruct-ov",
2620+
{"model_name": "slim-tags-npu-ov", "display_name": "agent-npu-tags",
2621+
"model_family": "OVGenerativeModel", "model_category": "generative_local",
2622+
"model_location": "llmware_repo", "context_window": 2048, "instruction_following": False,
2623+
"prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False,
2624+
"trailing_space": "", "gguf_file": "", "gguf_repo": "llmware/slim-tags-npu-ov",
2625+
"link": "https://huggingface.co/llmware/slim-tags-npu-ov",
2626+
"fetch": {"module": "llmware.models",
2627+
"method": "pull_snapshot_from_hf"},
2628+
"validation_files": [],
2629+
"custom_model_files": [], "custom_model_repo": "",
2630+
"output_type": "dict", "function_call": True,
2631+
"primary_keys": ["tags"],
2632+
"fc_output_Values": [], "parameters": 1.1,
2633+
"tokenizer": "llmware/slim-tags",
2634+
"tokenizer_local": "tokenizer_tl.json",
2635+
"marker_tokens": [], "marker_token_lookup": {},
2636+
"function": ["classify"], "npu_optimized": True,
2637+
},
2638+
2639+
{"model_name": "slim-topics-npu-ov", "display_name": "agent-topics",
2640+
"model_family": "OVGenerativeModel", "model_category": "generative_local",
2641+
"model_location": "llmware_repo", "context_window": 2048,
2642+
"instruction_following": False, "prompt_wrapper": "human_bot",
2643+
"temperature": 0.0, "sample_default": False, "trailing_space": "",
2644+
"gguf_file": "", "gguf_repo": "llmware/slim-topics-npu-ov",
2645+
"hf_repo": "llmware/slim-topics-npu-ov", "parameters": 1.1,
2646+
"link": "https://huggingface.co/llmware/slim-topics-npu-ov",
2647+
"custom_model_files": [], "custom_model_repo": "", "output_type": "dict",
2648+
"function_call": True, "primary_keys": ["topics"], "fc_output_values": [],
2649+
"tokenizer": "llmware/slim-sentiment", "tokenizer_local": "tokenizer_tl.json",
2650+
"marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"],
2651+
"snapshot": True, "npu_optimized": True,
2652+
"fetch": {"snapshot": True, "module": "llmware.models",
2653+
"method": "pull_snapshot_from_hf"},
2654+
"validation_files": [],
2655+
},
2656+
2657+
{"model_name": "llama-3.2-1b-instruct-npu-ov", "display_name": "llama-3.2-npu-1b",
2658+
"model_family": "OVGenerativeModel", "model_category": "generative_local",
2659+
"model_location": "llmware_repo", "context_window": 4096, "instruction_following": False,
2660+
"prompt_wrapper": "llama_3_chat", "temperature": 0.3, "trailing_space": "",
2661+
"hf_repo": "llmware/llama-3.2-1b-instruct-npu-ov",
2662+
"link": "https://huggingface.co/llmware/llama-3.2-1b-npu-instruct-ov",
2663+
"tokenizer_local": "tokenizer_ll3.json",
2664+
"fetch": {"module": "llmware.models", "method": "pull_snapshot_from_hf"},
2665+
"validation_files": [], "parameters": 1.1,
2666+
"custom_model_files": [], "custom_model_repo": "",
2667+
"npu_optimized": True,
2668+
"tags": ["llmware-chat", "p1", "ov", "green", "emerald"]},
2669+
2670+
{"model_name": "llama-3.2-3b-instruct-ov", "display_name": "llama-3.2-3b-instruct-ov",
26082671
"model_family": "OVGenerativeModel", "model_category": "generative_local",
26092672
"model_location": "llmware_repo", "context_window": 4096, "instruction_following": False,
26102673
"prompt_wrapper": "llama_3_chat", "temperature": 0.3, "trailing_space": "",

llmware/models.py

Lines changed: 115 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3868,7 +3868,8 @@ class OVGenerativeModel(BaseModel):
38683868
def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, model_card=None,
38693869
prompt_wrapper=None, instruction_following=False, context_window=2048,
38703870
sample=False,max_output=100, temperature=0.0,
3871-
get_logits=False, api_endpoint=None, device="GPU", **kwargs):
3871+
get_logits=False, api_endpoint=None, device="GPU",
3872+
pipeline="text2text", **kwargs):
38723873

38733874
super().__init__()
38743875

@@ -3886,6 +3887,8 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo
38863887
self.sample=sample
38873888
self.get_logits=get_logits
38883889

3890+
self.pipeline = pipeline
3891+
38893892
if get_logits:
38903893
logger.warning(f"OVGenerativeModel - current implementation does not support "
38913894
f"get_logits option.")
@@ -3921,6 +3924,9 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo
39213924
if "cache_dir" in model_card:
39223925
self.cache_dir = model_card["cache_dir"]
39233926

3927+
if "pipeline" in model_card:
3928+
self.pipeline = model_card["pipeline"]
3929+
39243930
# insert dynamic openvino load here
39253931
if not api_endpoint:
39263932

@@ -4086,7 +4092,8 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo
40864092

40874093
self.post_init()
40884094

4089-
def load_model_for_inference(self, loading_directions, model_card=None, **kwargs):
4095+
def load_model_for_inference (self, loading_directions,
4096+
model_card=None, pipeline=None,**kwargs):
40904097

40914098
""" Loads OV Model from local path using loading directions. """
40924099

@@ -4095,10 +4102,16 @@ def load_model_for_inference(self, loading_directions, model_card=None, **kwargs
40954102
self.model_repo_path = loading_directions
40964103
if model_card:
40974104
self.model_card = model_card
4105+
if "pipeline" in self.model_card:
4106+
self.pipeline = self.model_card["pipeline"]
4107+
4108+
if pipeline:
4109+
self.pipeline = pipeline
40984110

40994111
self.validate()
41004112

4101-
if self.device == "GPU" or self.optimize_for_gpu_if_available:
4113+
if self.device == "GPU" or (self.device == "CPU" and self.optimize_for_gpu_if_available):
4114+
41024115
device = self.device_resolver()
41034116
if device != self.device:
41044117
# resets self.device to the resolved device
@@ -4123,45 +4136,16 @@ def load_model_for_inference(self, loading_directions, model_card=None, **kwargs
41234136

41244137
# default is to cache to optimize performance on subsequent loads
41254138

4126-
if self.cache:
4127-
if self.cache_with_model:
4128-
# will put the cache files co-located with the model assets
4129-
path_to_cache_dir = loading_directions
4130-
else:
4131-
path_to_cache_dir = self.cache_custom
4132-
4133-
if self.verbose_mode:
4134-
logger.info(f"OVGenerativeModel - creating pipeline - "
4135-
f"{self.device} - {self.cache} - {path_to_cache_dir}")
4136-
4137-
try:
4138-
#TODO: need to test safety of path_to_cache_dir input in LLMPipeline constructor
4139-
4140-
self.pipe = ovg.LLMPipeline(loading_directions, self.device,
4141-
{"CACHE_DIR": path_to_cache_dir})
4142-
4143-
except:
4144-
raise LLMWareException(message=f"OVGenerativeModel - attempt to instantiate LLMPipeline failed - "
4145-
f"this could be for a number of reasons, including: "
4146-
f"\n1. openvino and openvino_genai installs are not supported "
4147-
f"on this os / hardware platform."
4148-
f"\n2. the model could not found at path: {loading_directions}, or "
4149-
f"\n3. the model may not a valid OpenVino format model.")
4139+
# build pipeline based on type
4140+
if self.pipeline == "text2image":
4141+
self.ov_text_to_image_pipeline()
41504142
else:
4151-
4152-
#TODO: confirm that empty plugin instructions with no caching will work on all platforms
4153-
try:
4154-
self.pipe = ovg.LLMPipeline(loading_directions, self.device, {})
4155-
except:
4156-
raise LLMWareException(message=f"OVGenerativeModel - attempt to instantiate LLMPipeline failed - "
4157-
f"this could be for a number of reasons, including: "
4158-
f"\n1. openvino and openvino_genai installs are not supported "
4159-
f"on this os / hardware platform."
4160-
f"\n2. the model could not found at path: {loading_directions}, or "
4161-
f"\n3. the model may not a valid OpenVino format model.")
4143+
# default: text2text
4144+
self.ov_text_to_text_pipeline()
41624145

41634146
if self.verbose_mode:
4164-
logger.info("OVGenerativeModel - completed new pipe creation")
4147+
logger.info(f"OVGenerativeModel - completed new pipe creation - "
4148+
f"{self.pipeline}")
41654149

41664150
return self
41674151

@@ -4221,6 +4205,98 @@ def load_ov_external_tokenizer(self):
42214205
# if no tokenizer found, then falls back to default tokenizer for 'approximate' count
42224206
self.tokenizer = Utilities().get_default_tokenizer()
42234207

4208+
def ov_text_to_text_pipeline(self):
4209+
4210+
""" Main entry point for instantiating models """
4211+
4212+
loading_directions = self.model_repo_path
4213+
4214+
global ovg
4215+
4216+
if self.cache:
4217+
if self.cache_with_model:
4218+
# will put the cache files co-located with the model assets
4219+
path_to_cache_dir = loading_directions
4220+
else:
4221+
path_to_cache_dir = self.cache_custom
4222+
4223+
if self.verbose_mode:
4224+
logger.info(f"OVGenerativeModel - creating pipeline - "
4225+
f"{self.device} - {self.cache} - {path_to_cache_dir}")
4226+
4227+
try:
4228+
#TODO: need to test safety of path_to_cache_dir input in LLMPipeline constructor
4229+
4230+
self.pipe = ovg.LLMPipeline(loading_directions, self.device,
4231+
{"CACHE_DIR": path_to_cache_dir})
4232+
4233+
except:
4234+
raise LLMWareException(message=f"OVGenerativeModel - attempt to instantiate LLMPipeline failed - "
4235+
f"this could be for a number of reasons, including: "
4236+
f"\n1. openvino and openvino_genai installs are not supported "
4237+
f"on this os / hardware platform."
4238+
f"\n2. the model could not found at path: {loading_directions}, or "
4239+
f"\n3. the model may not a valid OpenVino format model.")
4240+
else:
4241+
4242+
#TODO: confirm that empty plugin instructions with no caching will work on all platforms
4243+
try:
4244+
self.pipe = ovg.LLMPipeline(loading_directions, self.device, {})
4245+
except:
4246+
raise LLMWareException(message=f"OVGenerativeModel - attempt to instantiate LLMPipeline failed - "
4247+
f"this could be for a number of reasons, including: "
4248+
f"\n1. openvino and openvino_genai installs are not supported "
4249+
f"on this os / hardware platform."
4250+
f"\n2. the model could not found at path: {loading_directions}, or "
4251+
f"\n3. the model may not a valid OpenVino format model.")
4252+
4253+
return True
4254+
4255+
def ov_text_to_image_pipeline(self):
4256+
4257+
""" Model loading entry point for new OpenVINO text_to_image
4258+
pipeline for multimedia models that generate images from text prompt. """
4259+
4260+
global ovg
4261+
4262+
text_encoder_device = "GPU"
4263+
unet_device = "GPU"
4264+
vae_decoder_device = "GPU"
4265+
4266+
width = 512
4267+
height = 512
4268+
4269+
self.pipe = ovg.Text2ImagePipeline(self.model_repo_path)
4270+
4271+
self.pipe.reshape(1, height, width, self.pipe.get_generation_config().guidance_scale)
4272+
properties = {"CACHE_DIR": self.model_repo_path}
4273+
4274+
self.pipe.compile(text_encoder_device, unet_device, vae_decoder_device, config=properties)
4275+
4276+
return True
4277+
4278+
def text_to_image_gen(self, prompt, image_name):
4279+
4280+
""" Specialized generation function for image generating models. """
4281+
4282+
from PIL import Image
4283+
4284+
# experiment with different step numbers
4285+
# will expose as parameter in future releases
4286+
4287+
number_of_inference_steps_per_image = 10
4288+
4289+
tmp_path = LLMWareConfig().get_tmp_path()
4290+
img_path = os.path.join(tmp_path, str(image_name) + ".bmp")
4291+
4292+
image_tensor = self.pipe.generate(prompt,
4293+
num_inference_steps=number_of_inference_steps_per_image)
4294+
4295+
image = Image.fromarray(image_tensor.data[0])
4296+
image.save(img_path)
4297+
4298+
return img_path
4299+
42244300
def ov_token_counter(self, text):
42254301

42264302
""" Called twice in inference generation loop to get the input_token_count and

0 commit comments

Comments
 (0)