-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
62 lines (51 loc) · 1.91 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import streamlit as st
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
# Load BLIP model for image captioning
@st.cache_resource
def load_captioning_model():
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
return processor, model
# Generate caption for the uploaded image
def generate_caption(image, processor, model):
inputs = processor(images=image, return_tensors="pt").to("cpu")
with torch.no_grad():
outputs = model.generate(**inputs)
caption = processor.decode(outputs[0], skip_special_tokens=True)
return caption
# Streamlit Application
def main():
st.title("Gemini Vision Pro - Image Captioning")
# Centered upload section
st.write(
"""
<style>
[data-testid="stFileUploader"] {
margin: auto;
}
</style>
""",
unsafe_allow_html=True,
)
st.header("Upload Your Image")
uploaded_image = st.file_uploader("Choose an image", type=["png", "jpg", "jpeg"])
if uploaded_image:
try:
# Display the uploaded image
image = Image.open(uploaded_image).convert("RGB")
st.image(image, caption="Uploaded Image", use_column_width=True)
# Load the captioning model
with st.spinner("Loading captioning model..."):
processor, model = load_captioning_model()
# Generate the caption
with st.spinner("Analyzing the image..."):
caption = generate_caption(image, processor, model)
st.success("Image Analysis Complete!")
st.write("Generated Caption:")
st.write(caption)
except Exception as e:
st.error(f"An error occurred: {e}")
if __name__ == "__main__":
main()