VLMs guessing image segmentation strategies

VLMs guessing image segmentation strategies#

In this notebook we present images to VLMs and ask them which algorithm to use for segmenting the image. One could expect that depending on the image, the VLM suggests different strategies. In a second example, we demonstrate how a list of rules can be used to guide the VLM in guiding us.

import openai
import PIL
import stackview
from skimage import data
from skimage.io import imread
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import wordcloud

We will need some helper functions for assembling a prompt and submitting it to the openai server.

def prompt_with_image(message:str, image=None, model="gpt-4o-2024-11-20"):
    """A prompt helper function that sends a text message and an image
    to openAI and returns the text response.
    """
    import os
    
    # convert message in the right format if necessary
    if isinstance(message, str):
        message = [{"role": "user", "content": message}]
    
    if image is not None:
        image_message = image_to_message(image)
    else:
        image_message = []
        
    # setup connection to the LLM
    client = openai.OpenAI()
    
    # submit prompt
    response = client.chat.completions.create(
        model=model,
        messages=message + image_message
    )
    
    # extract answer
    return response.choices[0].message.content


def image_to_message(image):
    import base64

    from stackview._image_widget import _img_to_rgb

    rgb_image = _img_to_rgb(image)
    byte_stream = numpy_to_bytestream(rgb_image)
    base64_image = base64.b64encode(byte_stream).decode('utf-8')

    return [{"role": "user", "content": [{
        "type": "image_url",
        "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}"
        }

    }]}]


def numpy_to_bytestream(data):
    """Turn a NumPy array into a bytestream"""
    import numpy as np
    from PIL import Image
    import io

    # Convert the NumPy array to a PIL Image
    image = Image.fromarray(data.astype(np.uint8)).convert("RGBA")

    # Create a BytesIO object
    bytes_io = io.BytesIO()

    # Save the PIL image to the BytesIO object as a PNG
    image.save(bytes_io, format='PNG')

    # return the beginning of the file as a bytestream
    bytes_io.seek(0)
    return bytes_io.read()

These are the example images we will be using.

membrane_image = data.cells3d()[30, 0]
stackview.insight(membrane_image)

shape	(256, 256)
dtype	uint16
size	128.0 kB
min	277
max	44092

nuclei_image = data.human_mitosis()[100:200,:100]
stackview.insight(nuclei_image)

shape	(100, 100)
dtype	uint8
size	9.8 kB
min	7
max	96

dense_nuclei_image = data.human_mitosis()[330:430, 355:455]
stackview.insight(dense_nuclei_image)

shape	(100, 100)
dtype	uint8
size	9.8 kB
min	8
max	188

hela_cells = imread("data/hela-cells-8bit.tif")
stackview.insight(hela_cells)

shape	(512, 672, 3)
dtype	uint8
size	1008.0 kB
min	0
max	255

This helper function will send the image together with a prompt to the LLM service provider and display a word cloud of the suggested algorithms.

def determine_algorithm(prompt, image, num_samples=25):
    responses = []
    for _ in range(num_samples):
        responses.append(prompt_with_image(prompt, image))
    responses = [r.lower().replace("algorithm", "").replace("segmentation", "").strip() for r in responses]
    text = " ".join(responses)

    # Generate word cloud
    w = wordcloud.WordCloud(width=800, height=400, colormap='viridis', background_color='white').generate(text)

    # Display the word cloud
    plt.imshow(w, interpolation='bilinear')
    plt.show()

First, we prompt for microscopy image segmentation algorithms with no details and no image.

prompt = """You are a bioimage-analysis expert.
What is the best image processing algorithm to segment microscopy images?
Answer the algorithm name only. No explanations.
"""
determine_algorithm(prompt, None)

../_images/45fd74f770a1587e1591b74129f543932aa569a6d22451ad196aeca180d888e6.png

This is the simple prompt we submit to the server.

prompt = """You are a bioimage-analysis expert.
What is the best image processing algorithm to segment this microscopy image?
Answer the algorithm name only. No explanations.
"""

determine_algorithm(prompt, membrane_image)

../_images/df5905754a4a31b4eed474d09e321bd213915e4e316a03615639daf8e2b1ab2c.png

determine_algorithm(prompt, nuclei_image)

../_images/407b4690f622568b4c1c24204002c5a5863a3a730a245e8ff454c9bb29e72121.png

determine_algorithm(prompt, dense_nuclei_image)

../_images/d8701cbdcdecc1760299f768c337ca493b6e912b2b3ee433090c382979f31aed.png

determine_algorithm(prompt, hela_cells)

../_images/6e30aa90a2c05a850b77487a035cb378b05d7515909674a19646e18c237aeaad.png

Next, we try the same strategy using a more complex prompt containing a list of rules to guide the VLM.

prompt = """You are a bioimage-analysis expert. You have a rule-book what alogrithms to use for specific images.

## Rules

* If an image shows sparse objects such as nuclei, use Otsu-thresholding for segmenting them.
* If an image shows dense, partially overlapping objects such as nuclei, use StarDist.
* If an image shows large cell-like structures with bright membranes, use the Watershed algorithm.
* In case of doubt, use CellPose.

## The task

What is the best image processing algorithm to segment this microscopy image?
Answer the algorithm name only. No explanations.
"""

determine_algorithm(prompt, membrane_image)

../_images/480f5e1d30ff38517b31a056975aec77c6ae3f57703ea12bfe5e2aab300db643.png

determine_algorithm(prompt, nuclei_image)

../_images/f9a69f3c03975caae2dda197cc51333fe57029dfa0dd6f80d11cfafcb2bde967.png

determine_algorithm(prompt, dense_nuclei_image)

../_images/912fe6678c6de52e599519662ce7174c6d44a84ebe279819ce30438eec92fba8.png

determine_algorithm(prompt, hela_cells)

../_images/7633d5777213fee3a829f33cd023e0edfabce39b0b6db4d93a8ddc23044065f2.png

Exercise#

Load a natural picture, e.g. showing of a cat, and ask the LLM how to process the image using both prompts above.

VLMs guessing image segmentation strategies

Contents

VLMs guessing image segmentation strategies#

Exercise#