Renaming Screenshots with GPT Vision¶
This notebook demonstrates how to use the vision capabilites of gpt-4-turbo
in combination with magentic's structured outputs to rename all those screenshots cluttering your desktop.
In [1]:
Copied!
# List all screenshots
from pathlib import Path
path_desktop = Path.home() / "Desktop"
screenshot_paths = list(path_desktop.glob("Screenshot*.png"))
for screenshot_path in screenshot_paths:
print(screenshot_path.name)
# List all screenshots
from pathlib import Path
path_desktop = Path.home() / "Desktop"
screenshot_paths = list(path_desktop.glob("Screenshot*.png"))
for screenshot_path in screenshot_paths:
print(screenshot_path.name)
Screenshot 2024-04-20 at 10.49.08 PM Small.png Screenshot 2024-04-20 at 10.50.04 PM Small.png Screenshot 2024-04-20 at 10.50.57 PM Small.png
In [2]:
Copied!
# Display the first screenshot
from IPython.display import Image, display
def diplay_image(image_path):
display(Image(data=image_path.read_bytes(), width=400))
diplay_image(screenshot_paths[0])
# Display the first screenshot
from IPython.display import Image, display
def diplay_image(image_path):
display(Image(data=image_path.read_bytes(), width=400))
diplay_image(screenshot_paths[0])
In [3]:
Copied!
# Define desired output for each screenshot
# Include a description field to allow the LLM to think before naming the file
from pydantic import BaseModel, Field
class ScreenshotDetails(BaseModel):
description: str = Field(
description="A brief description of the screenshot, including details that will be useful for naming it."
)
filename: str = Field(
description="An appropriate file name for this image, excluding the file extension."
)
# Define desired output for each screenshot
# Include a description field to allow the LLM to think before naming the file
from pydantic import BaseModel, Field
class ScreenshotDetails(BaseModel):
description: str = Field(
description="A brief description of the screenshot, including details that will be useful for naming it."
)
filename: str = Field(
description="An appropriate file name for this image, excluding the file extension."
)
In [4]:
Copied!
# Create a prompt-function to return details given an image
from magentic import OpenaiChatModel, Placeholder, UserMessage, chatprompt
from magentic.vision import UserImageMessage
@chatprompt(
UserMessage("Describe the screenshot, then provide a suitable file name."),
UserImageMessage(Placeholder(bytes, "image")),
model=OpenaiChatModel("gpt-4-turbo"),
)
def describe_image(image: bytes) -> ScreenshotDetails: ...
# Create a prompt-function to return details given an image
from magentic import OpenaiChatModel, Placeholder, UserMessage, chatprompt
from magentic.vision import UserImageMessage
@chatprompt(
UserMessage("Describe the screenshot, then provide a suitable file name."),
UserImageMessage(Placeholder(bytes, "image")),
model=OpenaiChatModel("gpt-4-turbo"),
)
def describe_image(image: bytes) -> ScreenshotDetails: ...
In [5]:
Copied!
# Rename all screenshots using the prompt-function
for path_screenshot in path_desktop.glob("Screenshot*.png"):
print(path_screenshot.name)
diplay_image(path_screenshot)
image_bytes = path_screenshot.read_bytes()
image_details = describe_image(image_bytes)
print(image_details.description)
new_path = path_screenshot.with_stem(image_details.filename)
path_screenshot.rename(new_path)
print("\nRenamed to:", new_path.name)
print("\n\n---\n\n")
# Rename all screenshots using the prompt-function
for path_screenshot in path_desktop.glob("Screenshot*.png"):
print(path_screenshot.name)
diplay_image(path_screenshot)
image_bytes = path_screenshot.read_bytes()
image_details = describe_image(image_bytes)
print(image_details.description)
new_path = path_screenshot.with_stem(image_details.filename)
path_screenshot.rename(new_path)
print("\nRenamed to:", new_path.name)
print("\n\n---\n\n")