@@ -3868,7 +3868,8 @@ class OVGenerativeModel(BaseModel):
38683868 def __init__ (self , model = None , tokenizer = None , model_name = None , api_key = None , model_card = None ,
38693869 prompt_wrapper = None , instruction_following = False , context_window = 2048 ,
38703870 sample = False ,max_output = 100 , temperature = 0.0 ,
3871- get_logits = False , api_endpoint = None , device = "GPU" , ** kwargs ):
3871+ get_logits = False , api_endpoint = None , device = "GPU" ,
3872+ pipeline = "text2text" , ** kwargs ):
38723873
38733874 super ().__init__ ()
38743875
@@ -3886,6 +3887,8 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo
38863887 self .sample = sample
38873888 self .get_logits = get_logits
38883889
3890+ self .pipeline = pipeline
3891+
38893892 if get_logits :
38903893 logger .warning (f"OVGenerativeModel - current implementation does not support "
38913894 f"get_logits option." )
@@ -3921,6 +3924,9 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo
39213924 if "cache_dir" in model_card :
39223925 self .cache_dir = model_card ["cache_dir" ]
39233926
3927+ if "pipeline" in model_card :
3928+ self .pipeline = model_card ["pipeline" ]
3929+
39243930 # insert dynamic openvino load here
39253931 if not api_endpoint :
39263932
@@ -4086,7 +4092,8 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo
40864092
40874093 self .post_init ()
40884094
4089- def load_model_for_inference (self , loading_directions , model_card = None , ** kwargs ):
4095+ def load_model_for_inference (self , loading_directions ,
4096+ model_card = None , pipeline = None ,** kwargs ):
40904097
40914098 """ Loads OV Model from local path using loading directions. """
40924099
@@ -4095,10 +4102,16 @@ def load_model_for_inference(self, loading_directions, model_card=None, **kwargs
40954102 self .model_repo_path = loading_directions
40964103 if model_card :
40974104 self .model_card = model_card
4105+ if "pipeline" in self .model_card :
4106+ self .pipeline = self .model_card ["pipeline" ]
4107+
4108+ if pipeline :
4109+ self .pipeline = pipeline
40984110
40994111 self .validate ()
41004112
4101- if self .device == "GPU" or self .optimize_for_gpu_if_available :
4113+ if self .device == "GPU" or (self .device == "CPU" and self .optimize_for_gpu_if_available ):
4114+
41024115 device = self .device_resolver ()
41034116 if device != self .device :
41044117 # resets self.device to the resolved device
@@ -4123,45 +4136,16 @@ def load_model_for_inference(self, loading_directions, model_card=None, **kwargs
41234136
41244137 # default is to cache to optimize performance on subsequent loads
41254138
4126- if self .cache :
4127- if self .cache_with_model :
4128- # will put the cache files co-located with the model assets
4129- path_to_cache_dir = loading_directions
4130- else :
4131- path_to_cache_dir = self .cache_custom
4132-
4133- if self .verbose_mode :
4134- logger .info (f"OVGenerativeModel - creating pipeline - "
4135- f"{ self .device } - { self .cache } - { path_to_cache_dir } " )
4136-
4137- try :
4138- #TODO: need to test safety of path_to_cache_dir input in LLMPipeline constructor
4139-
4140- self .pipe = ovg .LLMPipeline (loading_directions , self .device ,
4141- {"CACHE_DIR" : path_to_cache_dir })
4142-
4143- except :
4144- raise LLMWareException (message = f"OVGenerativeModel - attempt to instantiate LLMPipeline failed - "
4145- f"this could be for a number of reasons, including: "
4146- f"\n 1. openvino and openvino_genai installs are not supported "
4147- f"on this os / hardware platform."
4148- f"\n 2. the model could not found at path: { loading_directions } , or "
4149- f"\n 3. the model may not a valid OpenVino format model." )
4139+ # build pipeline based on type
4140+ if self .pipeline == "text2image" :
4141+ self .ov_text_to_image_pipeline ()
41504142 else :
4151-
4152- #TODO: confirm that empty plugin instructions with no caching will work on all platforms
4153- try :
4154- self .pipe = ovg .LLMPipeline (loading_directions , self .device , {})
4155- except :
4156- raise LLMWareException (message = f"OVGenerativeModel - attempt to instantiate LLMPipeline failed - "
4157- f"this could be for a number of reasons, including: "
4158- f"\n 1. openvino and openvino_genai installs are not supported "
4159- f"on this os / hardware platform."
4160- f"\n 2. the model could not found at path: { loading_directions } , or "
4161- f"\n 3. the model may not a valid OpenVino format model." )
4143+ # default: text2text
4144+ self .ov_text_to_text_pipeline ()
41624145
41634146 if self .verbose_mode :
4164- logger .info ("OVGenerativeModel - completed new pipe creation" )
4147+ logger .info (f"OVGenerativeModel - completed new pipe creation - "
4148+ f"{ self .pipeline } " )
41654149
41664150 return self
41674151
@@ -4221,6 +4205,98 @@ def load_ov_external_tokenizer(self):
42214205 # if no tokenizer found, then falls back to default tokenizer for 'approximate' count
42224206 self .tokenizer = Utilities ().get_default_tokenizer ()
42234207
4208+ def ov_text_to_text_pipeline (self ):
4209+
4210+ """ Main entry point for instantiating models """
4211+
4212+ loading_directions = self .model_repo_path
4213+
4214+ global ovg
4215+
4216+ if self .cache :
4217+ if self .cache_with_model :
4218+ # will put the cache files co-located with the model assets
4219+ path_to_cache_dir = loading_directions
4220+ else :
4221+ path_to_cache_dir = self .cache_custom
4222+
4223+ if self .verbose_mode :
4224+ logger .info (f"OVGenerativeModel - creating pipeline - "
4225+ f"{ self .device } - { self .cache } - { path_to_cache_dir } " )
4226+
4227+ try :
4228+ #TODO: need to test safety of path_to_cache_dir input in LLMPipeline constructor
4229+
4230+ self .pipe = ovg .LLMPipeline (loading_directions , self .device ,
4231+ {"CACHE_DIR" : path_to_cache_dir })
4232+
4233+ except :
4234+ raise LLMWareException (message = f"OVGenerativeModel - attempt to instantiate LLMPipeline failed - "
4235+ f"this could be for a number of reasons, including: "
4236+ f"\n 1. openvino and openvino_genai installs are not supported "
4237+ f"on this os / hardware platform."
4238+ f"\n 2. the model could not found at path: { loading_directions } , or "
4239+ f"\n 3. the model may not a valid OpenVino format model." )
4240+ else :
4241+
4242+ #TODO: confirm that empty plugin instructions with no caching will work on all platforms
4243+ try :
4244+ self .pipe = ovg .LLMPipeline (loading_directions , self .device , {})
4245+ except :
4246+ raise LLMWareException (message = f"OVGenerativeModel - attempt to instantiate LLMPipeline failed - "
4247+ f"this could be for a number of reasons, including: "
4248+ f"\n 1. openvino and openvino_genai installs are not supported "
4249+ f"on this os / hardware platform."
4250+ f"\n 2. the model could not found at path: { loading_directions } , or "
4251+ f"\n 3. the model may not a valid OpenVino format model." )
4252+
4253+ return True
4254+
4255+ def ov_text_to_image_pipeline (self ):
4256+
4257+ """ Model loading entry point for new OpenVINO text_to_image
4258+ pipeline for multimedia models that generate images from text prompt. """
4259+
4260+ global ovg
4261+
4262+ text_encoder_device = "GPU"
4263+ unet_device = "GPU"
4264+ vae_decoder_device = "GPU"
4265+
4266+ width = 512
4267+ height = 512
4268+
4269+ self .pipe = ovg .Text2ImagePipeline (self .model_repo_path )
4270+
4271+ self .pipe .reshape (1 , height , width , self .pipe .get_generation_config ().guidance_scale )
4272+ properties = {"CACHE_DIR" : self .model_repo_path }
4273+
4274+ self .pipe .compile (text_encoder_device , unet_device , vae_decoder_device , config = properties )
4275+
4276+ return True
4277+
4278+ def text_to_image_gen (self , prompt , image_name ):
4279+
4280+ """ Specialized generation function for image generating models. """
4281+
4282+ from PIL import Image
4283+
4284+ # experiment with different step numbers
4285+ # will expose as parameter in future releases
4286+
4287+ number_of_inference_steps_per_image = 10
4288+
4289+ tmp_path = LLMWareConfig ().get_tmp_path ()
4290+ img_path = os .path .join (tmp_path , str (image_name ) + ".bmp" )
4291+
4292+ image_tensor = self .pipe .generate (prompt ,
4293+ num_inference_steps = number_of_inference_steps_per_image )
4294+
4295+ image = Image .fromarray (image_tensor .data [0 ])
4296+ image .save (img_path )
4297+
4298+ return img_path
4299+
42244300 def ov_token_counter (self , text ):
42254301
42264302 """ Called twice in inference generation loop to get the input_token_count and
0 commit comments