moondream2_a135904583256596.../handler.py

from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import torch
from io import BytesIO
import base64

class EndpointHandler:
    def __init__(self, model_dir):
        self.model_id = "vikhyatk/moondream2"
        self.model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True)
        self.tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream2", trust_remote_code=True)

        # Check if CUDA (GPU support) is available and then set the device to GPU or CPU
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

    def preprocess_image(self, encoded_image):
        """Decode and preprocess the input image."""
        decoded_image = base64.b64decode(encoded_image)
        img = Image.open(BytesIO(decoded_image)).convert("RGB")
        return img

    def __call__(self, data):
        """Handle the incoming request."""
        try:
            # Extract the inputs from the data
            inputs = data.pop("inputs", data)
            input_image = inputs['image']
            question = inputs.get('question', "move to the red ball")

            # Preprocess the image
            img = self.preprocess_image(input_image)

            # Perform inference
            enc_image = self.model.encode_image(img).to(self.device)
            answer = self.model.answer_question(enc_image, question, self.tokenizer)

            # If the output is a tensor, move it back to CPU and convert to list
            if isinstance(answer, torch.Tensor):
                answer = answer.cpu().numpy().tolist()

            # Create the response
            response = {
                "statusCode": 200,
                "body": {
                    "answer": answer
                }
            }
            return response
        except Exception as e:
            # Handle any errors
            response = {
                "statusCode": 500,
                "body": {
                    "error": str(e)
                }
            }
            return response
first commit 2024-11-14 10:00:20 +08:00			`from transformers import AutoModelForCausalLM, AutoTokenizer`
			`from PIL import Image`
			`import torch`
			`from io import BytesIO`
			`import base64`

			`class EndpointHandler:`
			`def __init__(self, model_dir):`
			`self.model_id = "vikhyatk/moondream2"`
			`self.model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True)`
			`self.tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream2", trust_remote_code=True)`

			`# Check if CUDA (GPU support) is available and then set the device to GPU or CPU`
			`self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")`
			`self.model.to(self.device)`

			`def preprocess_image(self, encoded_image):`
			`"""Decode and preprocess the input image."""`
			`decoded_image = base64.b64decode(encoded_image)`
			`img = Image.open(BytesIO(decoded_image)).convert("RGB")`
			`return img`

			`def __call__(self, data):`
			`"""Handle the incoming request."""`
			`try:`
			`# Extract the inputs from the data`
			`inputs = data.pop("inputs", data)`
			`input_image = inputs['image']`
			`question = inputs.get('question', "move to the red ball")`

			`# Preprocess the image`
			`img = self.preprocess_image(input_image)`

			`# Perform inference`
			`enc_image = self.model.encode_image(img).to(self.device)`
			`answer = self.model.answer_question(enc_image, question, self.tokenizer)`

			`# If the output is a tensor, move it back to CPU and convert to list`
			`if isinstance(answer, torch.Tensor):`
			`answer = answer.cpu().numpy().tolist()`

			`# Create the response`
			`response = {`
			`"statusCode": 200,`
			`"body": {`
			`"answer": answer`
			`}`
			`}`
			`return response`
			`except Exception as e:`
			`# Handle any errors`
			`response = {`
			`"statusCode": 500,`
			`"body": {`
			`"error": str(e)`
			`}`
			`}`
			`return response`