VLMs writing image analysis code#

In this notebook we present images to a VLM and ask it to write image analysis code. One could expect that depending on the image, the VLM suggests different strategies. In a second example, we demonstrate how a list of rules can be used to guide the VLM in using state-of-the-art algorithms depending on image characteristics.

import openai
import PIL
import stackview
from skimage import data
from skimage.io import imread
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import wordcloud

We will need some helper functions for assembling a prompt and submitting it to the openai server.

def prompt_with_image(message:str, image=None, model="gpt-4.1-2025-04-14"):
    """A prompt helper function that sends a text message and an image
    to openAI and returns the text response.
    """
    import os
    
    # convert message in the right format if necessary
    if isinstance(message, str):
        message = [{"role": "user", "content": message}]
    
    if image is not None:
        image_message = image_to_message(image)
    else:
        image_message = []
        
    # setup connection to the LLM
    client = openai.OpenAI()
    
    # submit prompt
    response = client.chat.completions.create(
        model=model,
        messages=message + image_message
    )
    
    # extract answer
    return response.choices[0].message.content


def image_to_message(image):
    import base64

    from stackview._image_widget import _img_to_rgb

    rgb_image = _img_to_rgb(image)
    byte_stream = numpy_to_bytestream(rgb_image)
    base64_image = base64.b64encode(byte_stream).decode('utf-8')

    return [{"role": "user", "content": [{
        "type": "image_url",
        "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}"
        }

    }]}]


def numpy_to_bytestream(data):
    """Turn a NumPy array into a bytestream"""
    import numpy as np
    from PIL import Image
    import io

    # Convert the NumPy array to a PIL Image
    image = Image.fromarray(data.astype(np.uint8)).convert("RGBA")

    # Create a BytesIO object
    bytes_io = io.BytesIO()

    # Save the PIL image to the BytesIO object as a PNG
    image.save(bytes_io, format='PNG')

    # return the beginning of the file as a bytestream
    bytes_io.seek(0)
    return bytes_io.read()

These are the example images we will be using.

sparse_nuclei_image = data.human_mitosis()[100:200,:100]
stackview.insight(sparse_nuclei_image)
shape(100, 100)
dtypeuint8
size9.8 kB
min7
max96
dense_nuclei_image = data.human_mitosis()[330:430, 355:455]
stackview.insight(dense_nuclei_image)
shape(100, 100)
dtypeuint8
size9.8 kB
min8
max188

This helper function will send the image together with a prompt to the LLM service provider and display either the code example (if num_samples=1) or a box-plot of a selection of found key words / algorithm names in the generated code.

import matplotlib.pyplot as plt
from collections import Counter

def determine_algorithm(prompt, image, num_samples=25):
    responses = []
    for _ in range(num_samples):
        responses.append(prompt_with_image(prompt, image))
    responses = [r.lower().split("```")[1] for r in responses]

    if num_samples == 1:
        print(responses[0])
        return

    # Count occurrences of "watershed" and "stardist"
    counts = Counter()

    counts['threshold'] = 0
    counts['watershed'] = 0
    counts['stardist'] = 0
    
    for response in responses:
        if "watershed" in response:
            counts['watershed'] += 1
        if "stardist" in response:
            counts['stardist'] += 1
        if "threshold" in response:
            counts['threshold'] += 1
        if "cellpose" in response:
            counts['cellpose'] += 1

    # Display bar plot
    bars = plt.bar(counts.keys(), counts.values(), color=['blue', 'orange', 'green', 'magenta'])
    plt.ylabel('Number of responses', fontsize=18)
    plt.title('Occurrence of algorithm names in code', fontsize=18)
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    plt.ylim(0, num_samples)
    plt.show()

First, we prompt for microscopy image segmentation code using both example images once to see example code that gets generated.

prompt = """You are a bioimage-analysis expert and excellent Python programmer.
First, describe the image in one sentence, then write Python code for segmenting this image. 
"""
determine_algorithm(prompt, sparse_nuclei_image, num_samples=1)
python
import matplotlib.pyplot as plt
from skimage import io, filters, morphology, measure
from skimage.filters import threshold_otsu
from scipy import ndimage as ndi

# load the image
image = io.imread('your_image_filename.png')  # replace with your file path

# apply a gaussian filter to smooth out the noise
smoothed = filters.gaussian(image, sigma=1)

# compute otsu's threshold
thresh = threshold_otsu(smoothed)

# binarize the image
binary = smoothed > thresh

# remove small artifacts
cleaned = morphology.remove_small_objects(binary, min_size=30)

# fill small holes inside nuclei
filled = ndi.binary_fill_holes(cleaned)

# label connected components
label_image = measure.label(filled)

# display results
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
axes[0].imshow(image, cmap='gray')
axes[0].set_title('original')
axes[1].imshow(filled, cmap='gray')
axes[1].set_title('segmented mask')
axes[2].imshow(image, cmap='gray')
axes[2].imshow(label_image, cmap='nipy_spectral', alpha=0.5)
axes[2].set_title('labels overlay')
for ax in axes:
    ax.axis('off')
plt.tight_layout()
plt.show()
determine_algorithm(prompt, dense_nuclei_image, num_samples=1)
python
import numpy as np
import matplotlib.pyplot as plt
from skimage import io, filters, measure, morphology
from scipy import ndimage as ndi

# load the image (replace 'nuclei.png' with your actual filename if needed)
img = io.imread('nuclei.png')

# apply gaussian filter to reduce noise
smoothed = filters.gaussian(img, sigma=1)

# threshold the image to create a binary mask
thresh = filters.threshold_otsu(smoothed)
binary = smoothed > thresh

# remove small objects and fill holes
cleaned = morphology.remove_small_objects(binary, min_size=30)
cleaned = ndi.binary_fill_holes(cleaned)

# optionally separate touching nuclei using the distance transform + watershed
distance = ndi.distance_transform_edt(cleaned)
peaks = morphology.h_maxima(distance, 0.3)
markers, _ = ndi.label(peaks)
labels = morphology.watershed(-distance, markers, mask=cleaned)

# show results
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
axes[0].imshow(img, cmap='gray')
axes[0].set_title('original')
axes[1].imshow(cleaned, cmap='gray')
axes[1].set_title('binary mask')
axes[2].imshow(labels, cmap='nipy_spectral')
axes[2].set_title('segmented nuclei')
for ax in axes:
    ax.axis('off')
plt.tight_layout()
plt.show()

Then, we run the test multiple times to derive a box-plot of used algorithms.

determine_algorithm(prompt, sparse_nuclei_image)
../_images/15c421034011144d860a9ab06d40331a50f73f4c3a302fa2e2ff611c21aaaf70.png
determine_algorithm(prompt, dense_nuclei_image)
../_images/c75feeedea829ad7ccdaefaf98720aee6567653336971133cb886a2ea829a428.png

Next, we try the same strategy using a more complex prompt containing a list of rules to guide the VLM.

prompt = """You are a bioimage-analysis expert and excellent Python programmer.
First, describe the image in one sentence, then write Python code for segmenting this image according to given rules:
* If an image shows sparse objects such as nuclei, use Otsu-thresholding for segmenting them.
* If an image shows dense, partially overlapping objects such as nuclei, use StarDist.
"""
determine_algorithm(prompt, sparse_nuclei_image)
../_images/d88d03d2535d6ac71328f54330dd2b0097f81d0573dd555247cddf33786a9339.png
determine_algorithm(prompt, dense_nuclei_image)
../_images/c4673e4ad5682a20a82b178adcb8fad8fb6657de20160320bf7fa75bed682c66.png

For completeness, we display some example code generated with the advanced prompt as well.

determine_algorithm(prompt, sparse_nuclei_image, num_samples=1)
python
import numpy as np
import matplotlib.pyplot as plt
from skimage import io, filters, measure, morphology

# load image (replace 'path/to/image.png' with your actual file path if needed)
image = io.imread('your_image.png')  # or use the image array directly if available

# normalize if necessary
if image.max() > 1:
    image = image / image.max()

# apply otsu threshold
threshold = filters.threshold_otsu(image)
binary = image > threshold

# optional: remove small objects
cleaned = morphology.remove_small_objects(binary, min_size=30)

# label objects
label_img = measure.label(cleaned)

# display results
fig, ax = plt.subplots(1, 3, figsize=(12, 4))
ax[0].imshow(image, cmap='gray')
ax[0].set_title('original')
ax[1].imshow(cleaned, cmap='gray')
ax[1].set_title('otsu threshold')
ax[2].imshow(label_img, cmap='nipy_spectral')
ax[2].set_title('labeled objects')
for a in ax: a.axis('off')
plt.tight_layout()
plt.show()
determine_algorithm(prompt, dense_nuclei_image, num_samples=1)
python
import matplotlib.pyplot as plt
from skimage.io import imread
from stardist.models import stardist2d
from csbdeep.utils import normalize

# load image (assuming the file is 'nuclei.png')
img = imread("nuclei.png")

# normalize image for stardist
img_norm = normalize(img, 1, 99.8, axis=(0,1))

# load pre-trained stardist model for 2d fluorescent nuclei
model = stardist2d.from_pretrained('2d_fluro_nuclei')

# perform prediction
labels, _ = model.predict_instances(img_norm)

# display results
plt.figure(figsize=(8,4))
plt.subplot(1,2,1)
plt.title('original')
plt.imshow(img, cmap='gray')
plt.axis('off')

plt.subplot(1,2,2)
plt.title('stardist segmentation')
plt.imshow(labels, cmap='jet')
plt.axis('off')
plt.show()