| | import gradio as gr |
| | from transformers import pipeline |
| | from PIL import Image |
| |
|
| | |
| | caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") |
| |
|
| | |
| | emotion_pipeline = pipeline("image-classification", model="RickyIG/emotion_face_image_classification_v3") |
| |
|
| | |
| | object_pipeline = pipeline("object-detection", model="facebook/detr-resnet-50") |
| |
|
| | def generate_caption_emotion_and_objects(image): |
| | |
| | caption_result = caption_pipeline(image) |
| | caption = caption_result[0]["generated_text"] |
| |
|
| | |
| | emotion_result = emotion_pipeline(image) |
| | emotions = ", ".join([f"{res['label']}: {res['score']:.2f}" for res in emotion_result]) |
| |
|
| | |
| | object_result = object_pipeline(image) |
| | objects = ", ".join([f"{obj['label']}: {obj['score']:.2f}" for obj in object_result]) |
| |
|
| | |
| | combined_result = f"Caption: {caption}\nEmotions: {emotions}\nObjects: {objects}" |
| | return combined_result |
| |
|
| | |
| | interface = gr.Interface(fn=generate_caption_emotion_and_objects, |
| | inputs=gr.components.Image(type="pil", label="Upload an Image"), |
| | outputs=gr.components.Textbox(label="Generated Caption, Emotions, and Objects Detected")) |
| | interface.launch() |
| |
|