Merge pull request #1198 from llmware-ai/update-ov-models-0627

doberst · web-flow · commit 6da4685720a4 · 2025-06-27T11:34:34.000-04:00
update ov multimedia models 062725
diff --git a/llmware/model_configs.py b/llmware/model_configs.py
@@ -94,7 +94,20 @@
         "validation_files": ["openvino_model.xml"],
         "link": "https://huggingface.co/llmware/phi-3-ov"},
 
-      {"model_name": "qwen2.5-1.5b-instruct-ov", "display_name": "qwen2.5-1.5b-instruct-ov",
+      # new text-to-image model - more coming soon
+      {"model_name": "lcm-dreamshaper-ov", "model_family": "OVGenerativeModel",
+       "model_category": "generative_local", "display_name": "lcm-dreamshaper-ov",
+       "model_location": "llmware_repo", "pipeline": "text2image",
+       "context_window": 4096, "instruction_following": False, "prompt_wrapper": "phi_3",
+       "temperature": 0.0, "sample_default": False, "trailing_space": "",
+       "tokenizer_local": "tokenizer_phi3.json",
+       "hf_repo": "llmware/lcm-dreamshaper-ov",
+       "custom_model_files": [], "custom_model_repo": "",
+       "fetch": {"snapshot": True, "module": "llmware.models", "method": "pull_snapshot_from_hf"},
+       "validation_files": [],
+       "link": "https://huggingface.co/llmware/lcm-dreamshaper-ov"},
+
+ {"model_name": "qwen2.5-1.5b-instruct-ov", "display_name": "qwen2.5-1.5b-instruct-ov",
        "model_family": "OVGenerativeModel", "model_category": "generative_local",
        "model_location": "llmware_repo", "context_window": 4096,  "instruction_following": False,
        "prompt_wrapper": "hf_chat", "temperature": 0.3, "trailing_space": "",
@@ -2604,7 +2617,57 @@
              "validation_files": [],
              "custom_model_files": [], "custom_model_repo": ""},
 
-            {"model_name": "llama-3.2-3b-instruct-ov", "display_name": "llama-3.2-3b-instruct-ov",
+ {"model_name": "slim-tags-npu-ov", "display_name": "agent-npu-tags",
+    "model_family": "OVGenerativeModel", "model_category": "generative_local",
+    "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False,
+    "prompt_wrapper": "human_bot", "temperature": 0.0, "sample_default": False,
+    "trailing_space": "", "gguf_file": "", "gguf_repo": "llmware/slim-tags-npu-ov",
+    "link": "https://huggingface.co/llmware/slim-tags-npu-ov",
+    "fetch": {"module": "llmware.models",
+            "method": "pull_snapshot_from_hf"},
+    "validation_files": [],
+    "custom_model_files": [], "custom_model_repo": "",
+    "output_type": "dict", "function_call": True,
+    "primary_keys": ["tags"],
+    "fc_output_Values": [], "parameters": 1.1,
+    "tokenizer": "llmware/slim-tags",
+    "tokenizer_local": "tokenizer_tl.json",
+    "marker_tokens": [], "marker_token_lookup": {},
+    "function": ["classify"], "npu_optimized": True,
+   },
+
+  {"model_name": "slim-topics-npu-ov", "display_name": "agent-topics",
+  "model_family": "OVGenerativeModel", "model_category": "generative_local",
+  "model_location": "llmware_repo", "context_window": 2048,
+  "instruction_following": False, "prompt_wrapper": "human_bot",
+  "temperature": 0.0, "sample_default": False, "trailing_space": "",
+  "gguf_file": "", "gguf_repo": "llmware/slim-topics-npu-ov",
+  "hf_repo": "llmware/slim-topics-npu-ov", "parameters": 1.1,
+  "link": "https://huggingface.co/llmware/slim-topics-npu-ov",
+  "custom_model_files": [], "custom_model_repo": "", "output_type": "dict",
+  "function_call": True, "primary_keys": ["topics"], "fc_output_values": [],
+  "tokenizer": "llmware/slim-sentiment", "tokenizer_local": "tokenizer_tl.json",
+  "marker_tokens": [], "marker_token_lookup": {}, "function": ["classify"],
+  "snapshot": True, "npu_optimized": True,
+  "fetch": {"snapshot": True, "module": "llmware.models",
+            "method": "pull_snapshot_from_hf"},
+  "validation_files": [],
+  },
+
+ {"model_name": "llama-3.2-1b-instruct-npu-ov", "display_name": "llama-3.2-npu-1b",
+  "model_family": "OVGenerativeModel", "model_category": "generative_local",
+  "model_location": "llmware_repo", "context_window": 4096, "instruction_following": False,
+  "prompt_wrapper": "llama_3_chat", "temperature": 0.3, "trailing_space": "",
+  "hf_repo": "llmware/llama-3.2-1b-instruct-npu-ov",
+  "link": "https://huggingface.co/llmware/llama-3.2-1b-npu-instruct-ov",
+  "tokenizer_local": "tokenizer_ll3.json",
+  "fetch": {"module": "llmware.models", "method": "pull_snapshot_from_hf"},
+  "validation_files": [], "parameters": 1.1,
+  "custom_model_files": [], "custom_model_repo": "",
+  "npu_optimized": True,
+  "tags": ["llmware-chat", "p1", "ov", "green", "emerald"]},
+
+ {"model_name": "llama-3.2-3b-instruct-ov", "display_name": "llama-3.2-3b-instruct-ov",
              "model_family": "OVGenerativeModel", "model_category": "generative_local",
              "model_location": "llmware_repo", "context_window": 4096,  "instruction_following": False,
              "prompt_wrapper": "llama_3_chat", "temperature": 0.3, "trailing_space": "",
diff --git a/llmware/models.py b/llmware/models.py
@@ -3868,7 +3868,8 @@ class OVGenerativeModel(BaseModel):
     def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, model_card=None,
                  prompt_wrapper=None, instruction_following=False, context_window=2048,
                  sample=False,max_output=100, temperature=0.0,
-                 get_logits=False, api_endpoint=None, device="GPU", **kwargs):
+                 get_logits=False, api_endpoint=None, device="GPU",
+                 pipeline="text2text", **kwargs):
 
         super().__init__()
 
@@ -3886,6 +3887,8 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo
         self.sample=sample
         self.get_logits=get_logits
 
+        self.pipeline = pipeline
+
         if get_logits:
             logger.warning(f"OVGenerativeModel - current implementation does not support "
                            f"get_logits option.")
@@ -3921,6 +3924,9 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo
             if "cache_dir" in model_card:
                 self.cache_dir = model_card["cache_dir"]
 
+            if "pipeline" in model_card:
+                self.pipeline = model_card["pipeline"]
+
         # insert dynamic openvino load here
         if not api_endpoint:
 
@@ -4086,7 +4092,8 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo
 
         self.post_init()
 
-    def load_model_for_inference(self, loading_directions, model_card=None, **kwargs):
+    def load_model_for_inference (self, loading_directions,
+                                  model_card=None, pipeline=None,**kwargs):
 
         """ Loads OV Model from local path using loading directions. """
 
@@ -4095,10 +4102,16 @@ def load_model_for_inference(self, loading_directions, model_card=None, **kwargs
         self.model_repo_path = loading_directions
         if model_card:
             self.model_card = model_card
+            if "pipeline" in self.model_card:
+                self.pipeline = self.model_card["pipeline"]
+
+        if pipeline:
+            self.pipeline = pipeline
 
         self.validate()
 
-        if self.device == "GPU" or self.optimize_for_gpu_if_available:
+        if self.device == "GPU" or (self.device == "CPU" and self.optimize_for_gpu_if_available):
+
             device = self.device_resolver()
             if device != self.device:
                 # resets self.device to the resolved device
@@ -4123,45 +4136,16 @@ def load_model_for_inference(self, loading_directions, model_card=None, **kwargs
 
         #   default is to cache to optimize performance on subsequent loads
 
-        if self.cache:
-            if self.cache_with_model:
-                # will put the cache files co-located with the model assets
-                path_to_cache_dir = loading_directions
-            else:
-                path_to_cache_dir = self.cache_custom
-
-            if self.verbose_mode:
-                logger.info(f"OVGenerativeModel - creating pipeline - "
-                            f"{self.device} - {self.cache} - {path_to_cache_dir}")
-
-            try:
-                #TODO: need to test safety of path_to_cache_dir input in LLMPipeline constructor
-
-                self.pipe = ovg.LLMPipeline(loading_directions, self.device,
-                                            {"CACHE_DIR": path_to_cache_dir})
-
-            except:
-                raise LLMWareException(message=f"OVGenerativeModel - attempt to instantiate LLMPipeline failed - "
-                                               f"this could be for a number of reasons, including: "
-                                               f"\n1. openvino and openvino_genai installs are not supported "
-                                               f"on this os / hardware platform."
-                                               f"\n2. the model could not found at path: {loading_directions}, or "
-                                               f"\n3. the model may not a valid OpenVino format model.")
+        #   build pipeline based on type
+        if self.pipeline == "text2image":
+            self.ov_text_to_image_pipeline()
         else:
-
-            #TODO: confirm that empty plugin instructions with no caching will work on all platforms
-            try:
-                self.pipe = ovg.LLMPipeline(loading_directions, self.device, {})
-            except:
-                raise LLMWareException(message=f"OVGenerativeModel - attempt to instantiate LLMPipeline failed - "
-                                               f"this could be for a number of reasons, including: "
-                                               f"\n1. openvino and openvino_genai installs are not supported "
-                                               f"on this os / hardware platform."
-                                               f"\n2. the model could not found at path: {loading_directions}, or "
-                                               f"\n3. the model may not a valid OpenVino format model.")
+            # default: text2text
+            self.ov_text_to_text_pipeline()
 
         if self.verbose_mode:
-            logger.info("OVGenerativeModel - completed new pipe creation")
+            logger.info(f"OVGenerativeModel - completed new pipe creation - "
+                        f"{self.pipeline}")
 
         return self
 
@@ -4221,6 +4205,98 @@ def load_ov_external_tokenizer(self):
             # if no tokenizer found, then falls back to default tokenizer for 'approximate' count
             self.tokenizer = Utilities().get_default_tokenizer()
 
+    def ov_text_to_text_pipeline(self):
+
+        """ Main entry point for instantiating models """
+
+        loading_directions = self.model_repo_path
+
+        global ovg
+
+        if self.cache:
+            if self.cache_with_model:
+                # will put the cache files co-located with the model assets
+                path_to_cache_dir = loading_directions
+            else:
+                path_to_cache_dir = self.cache_custom
+
+            if self.verbose_mode:
+                logger.info(f"OVGenerativeModel - creating pipeline - "
+                            f"{self.device} - {self.cache} - {path_to_cache_dir}")
+
+            try:
+                #TODO: need to test safety of path_to_cache_dir input in LLMPipeline constructor
+
+                self.pipe = ovg.LLMPipeline(loading_directions, self.device,
+                                            {"CACHE_DIR": path_to_cache_dir})
+
+            except:
+                raise LLMWareException(message=f"OVGenerativeModel - attempt to instantiate LLMPipeline failed - "
+                                               f"this could be for a number of reasons, including: "
+                                               f"\n1. openvino and openvino_genai installs are not supported "
+                                               f"on this os / hardware platform."
+                                               f"\n2. the model could not found at path: {loading_directions}, or "
+                                               f"\n3. the model may not a valid OpenVino format model.")
+        else:
+
+            #TODO: confirm that empty plugin instructions with no caching will work on all platforms
+            try:
+                self.pipe = ovg.LLMPipeline(loading_directions, self.device, {})
+            except:
+                raise LLMWareException(message=f"OVGenerativeModel - attempt to instantiate LLMPipeline failed - "
+                                               f"this could be for a number of reasons, including: "
+                                               f"\n1. openvino and openvino_genai installs are not supported "
+                                               f"on this os / hardware platform."
+                                               f"\n2. the model could not found at path: {loading_directions}, or "
+                                               f"\n3. the model may not a valid OpenVino format model.")
+
+        return True
+
+    def ov_text_to_image_pipeline(self):
+
+        """ Model loading entry point for new OpenVINO text_to_image
+        pipeline for multimedia models that generate images from text prompt. """
+
+        global ovg
+
+        text_encoder_device = "GPU"
+        unet_device = "GPU"
+        vae_decoder_device = "GPU"
+
+        width = 512
+        height = 512
+
+        self.pipe = ovg.Text2ImagePipeline(self.model_repo_path)
+
+        self.pipe.reshape(1, height, width, self.pipe.get_generation_config().guidance_scale)
+        properties = {"CACHE_DIR": self.model_repo_path}
+
+        self.pipe.compile(text_encoder_device, unet_device, vae_decoder_device, config=properties)
+
+        return True
+
+    def text_to_image_gen(self, prompt, image_name):
+
+        """ Specialized generation function for image generating models. """
+
+        from PIL import Image
+
+        # experiment with different step numbers
+        # will expose as parameter in future releases
+
+        number_of_inference_steps_per_image = 10
+
+        tmp_path = LLMWareConfig().get_tmp_path()
+        img_path = os.path.join(tmp_path, str(image_name) + ".bmp")
+
+        image_tensor = self.pipe.generate(prompt,
+                                          num_inference_steps=number_of_inference_steps_per_image)
+
+        image = Image.fromarray(image_tensor.data[0])
+        image.save(img_path)
+
+        return img_path
+
     def ov_token_counter(self, text):
 
         """ Called twice in inference generation loop to get the input_token_count and