In [1]:
import os
import io

from IPython.display import Image
import base64
import sys

sys.path.append("../")

from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())  # read local .env file
from alchemy import AlchemyAPI

hf_api_key = os.environ["HF_API_KEY"]

In [3]:
MODEL = "Salesforce/blip-image-captioning-base"
HF_API = f"{os.environ['HF_API_BASE']}/{MODEL}"
HF_API

'https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base'

In [4]:
alchemyapi = AlchemyAPI(hf_api_key, HF_API)

In [5]:
alchemyImage1 = "https://free-images.com/sm/9596/dog_animal_greyhound_983023.jpg"
display(Image(url=alchemyImage1))

In [6]:
alchemyapi.invoke_model(alchemyImage1)

[{'generated_text': 'a dog wearing a santa hat and a red scarf'}]

In [8]:
import gradio as gr


def image_to_base64_str(pil_image):
    byte_arr = io.BytesIO()
    pil_image.save(byte_arr, format="PNG")
    byte_arr = byte_arr.getvalue()
    return str(base64.b64encode(byte_arr).decode("utf-8"))


def captioner(image):
    base64_image = image_to_base64_str(image)
    result = alchemyapi.invoke_model(base64_image)
    return result[0]["generated_text"]


gr.close_all()
demo = gr.Interface(
    fn=captioner,
    inputs=[gr.Image(label="Upload image", type="pil")],
    outputs=[gr.Textbox(label="Caption")],
    title="Image Captioning with BLIP",
    description="Caption any image using the BLIP model",
    allow_flagging="never",
    examples=["../assets/profile.png"],
)

demo.launch()

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


