OpenAI Embeddings#

For completeness, this is how to generate text-embeddings using the OpenAI API.

import openai
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
def embed_openai(text):
    from openai import OpenAI
    client = OpenAI()

    response = client.embeddings.create(
        input=text,
        model="text-embedding-3-small"
    )
    return response.data[0].embedding
words = ["microscope", "cat", "fur", "black", "white"]

# Example of input dictionary
object_coords = {word: embed_openai(word) for word in words}
# Extract names and numerical lists
names = list(object_coords.keys())
data_matrix = np.array(list(object_coords.values()))

# Apply PCA
pca = PCA(n_components=2)  # Reduce to 2 components for visualization
transformed_data = pca.fit_transform(data_matrix)

# Create scatter plot
plt.figure(figsize=(3, 3))
plt.scatter(transformed_data[:, 0], transformed_data[:, 1])

# Annotate data points with names
for i, name in enumerate(names):
    plt.annotate(name, (transformed_data[i, 0], transformed_data[i, 1]))

plt.title('PCA of text embedding')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()
../_images/a946504d415b9ef0e3a77738d7732c21e2fd0e0349588f92d5958104c0763e08.png