Ollama Embeddings

Ollama Embeddings#

Ollama can also serve embedding models locally. Before executing the following code, you need to run ollama pull embedinggemma once to download the embedding model. Also, depending on how you installed ollama, you may have to execute it in a terminal window using this command, before executing this notebook:

ollama serve

As you will see, we access the local embedding models offered via ollama using the OpenAI API as shown in previous examples. We just exchange the base_url and we do not need to provide an API-Key.

import openai
openai.__version__

'1.41.0'

Creating Embeddings with Ollama#

We define a helper function to generate text embeddings using the local Ollama endpoint. The function connects to the local Ollama server and uses the “embedinggemma” model to create vector representations of text.

def embed_ollama(text, model="embeddinggemma"):
    """A helper function that generates embeddings using ollama and returns the embedding vector."""
    
    # setup connection to the local Ollama server
    client = openai.OpenAI()
    client.base_url = "http://localhost:11434/v1"
    client.api_key = "none"  # No API key needed for local ollama
    
    # create embedding
    response = client.embeddings.create(
        input=text,
        model=model
    )
    
    # extract embedding vector
    return response.data[0].embedding

Let’s test the embedding function with a simple example:

# Test with a simple text
test_text = "Hello, this is a test sentence for embeddings."
embedding = embed_ollama(test_text)

print(f"Text: {test_text}")
print(f"Embedding dimension: {len(embedding)}")
print(f"First 5 values: {embedding[:5]}")

Text: Hello, this is a test sentence for embeddings.
Embedding dimension: 768
First 5 values: [-0.16048418, -0.002961286, 0.014041578, -0.029707532, -0.009763586]

Working with Multiple Texts#

Let’s generate embeddings for multiple texts and compare them:

# Define some sample texts
texts = [
    "The cat sat on the mat.",
    "A feline rested on the carpet.",
    "The dog ran in the park.",
    "Machine learning is fascinating.",
    "Artificial intelligence transforms technology."
]

# Generate embeddings for all texts
embeddings = {}
for i, text in enumerate(texts):
    embeddings[f"text_{i+1}"] = embed_ollama(text)
    print(f"Generated embedding for text {i+1}: {text[:30]}...")

print(f"\nGenerated {len(embeddings)} embeddings successfully!")

Generated embedding for text 1: The cat sat on the mat....
Generated embedding for text 2: A feline rested on the carpet....
Generated embedding for text 2: A feline rested on the carpet....
Generated embedding for text 3: The dog ran in the park....
Generated embedding for text 3: The dog ran in the park....
Generated embedding for text 4: Machine learning is fascinatin...
Generated embedding for text 4: Machine learning is fascinatin...
Generated embedding for text 5: Artificial intelligence transf...

Generated 5 embeddings successfully!
Generated embedding for text 5: Artificial intelligence transf...

Generated 5 embeddings successfully!

Visualizing Embeddings#

Let’s use PCA to reduce the dimensionality and visualize the embeddings in 2D space:

import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA

# Apply PCA to reduce to 2 dimensions
pca = PCA(n_components=2)
embeddings_2d = pca.fit_transform(list(embeddings.values()))

# Create scatter plot
plt.figure(figsize=(10, 8))
plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1], s=100, alpha=0.7)

# Add text labels for each point
for i, text in enumerate(texts):
    plt.annotate(f"{i+1}: {text[:25]}...", 
                (embeddings_2d[i, 0], embeddings_2d[i, 1]),
                xytext=(5, 5), textcoords='offset points',
                fontsize=9, alpha=0.8)

plt.title('Text Embeddings Visualization (PCA Projection)', fontsize=14)
plt.xlabel(f'Principal Component 1 (explained variance: {pca.explained_variance_ratio_[0]:.3f})')
plt.ylabel(f'Principal Component 2 (explained variance: {pca.explained_variance_ratio_[1]:.3f})')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"Total explained variance: {sum(pca.explained_variance_ratio_):.3f}")

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[12], line 6
# Apply PCA to reduce to 2 dimensions
pca = PCA(n_components=2)
----> 6 embeddings_2d = pca.fit_transform(embeddings.values())
# Create scatter plot
plt.figure(figsize=(10, 8))

File c:\Users\rober\miniconda3\envs\bob-env\Lib\site-packages\sklearn\utils\_set_output.py:313, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
@wraps(f)
def wrapped(self, X, *args, **kwargs):
--> 313     data_to_wrap = f(self, X, *args, **kwargs)
   if isinstance(data_to_wrap, tuple):
       # only wrap the first output for cross decomposition
       return_tuple = (
           _wrap_data_with_container(method, data_to_wrap[0], X, self),
           *data_to_wrap[1:],
       )

File c:\Users\rober\miniconda3\envs\bob-env\Lib\site-packages\sklearn\base.py:1473, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
   estimator._validate_params()
with config_context(
   skip_parameter_validation=(
       prefer_skip_nested_validation or global_skip_validation
   )
):
-> 1473     return fit_method(estimator, *args, **kwargs)

File c:\Users\rober\miniconda3\envs\bob-env\Lib\site-packages\sklearn\decomposition\_pca.py:474, in PCA.fit_transform(self, X, y)
@_fit_context(prefer_skip_nested_validation=True)
def fit_transform(self, X, y=None):
   """Fit the model with X and apply the dimensionality reduction on X.

   Parameters
   (...)
   C-ordered array, use 'np.ascontiguousarray'.
   """
--> 474     U, S, _, X, x_is_centered, xp = self._fit(X)
   if U is not None:
       U = U[:, : self.n_components_]

File c:\Users\rober\miniconda3\envs\bob-env\Lib\site-packages\sklearn\decomposition\_pca.py:511, in PCA._fit(self, X)
   raise ValueError(
       "PCA with svd_solver='arpack' is not supported for Array API inputs."
   )
# Validate the data, without ever forcing a copy as any solver that
# supports sparse input data and the `covariance_eigh` solver are
# written in a way to avoid the need for any inplace modification of
# the input data contrary to the other solvers.
# The copy will happen
# later, only if needed, once the solver negotiation below is done.
--> 511 X = self._validate_data(
   X,
   dtype=[xp.float64, xp.float32],
   force_writeable=True,
   accept_sparse=("csr", "csc"),
   ensure_2d=True,
   copy=False,
)
self._fit_svd_solver = self.svd_solver
if self._fit_svd_solver == "auto" and issparse(X):

File c:\Users\rober\miniconda3\envs\bob-env\Lib\site-packages\sklearn\base.py:633, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)
       out = X, y
elif not no_val_X and no_val_y:
--> 633     out = check_array(X, input_name="X", **check_params)
elif no_val_X and not no_val_y:
   out = _check_y(y, **check_params)

File c:\Users\rober\miniconda3\envs\bob-env\Lib\site-packages\sklearn\utils\validation.py:1012, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
       array = xp.astype(array, dtype, copy=False)
   else:
-> 1012         array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)
except ComplexWarning as complex_warning:
   raise ValueError(
       "Complex data not supported\n{}\n".format(array)
   ) from complex_warning

File c:\Users\rober\miniconda3\envs\bob-env\Lib\site-packages\sklearn\utils\_array_api.py:751, in _asarray_with_order(array, dtype, order, copy, xp, device)
   array = numpy.array(array, order=order, dtype=dtype)
else:
--> 751     array = numpy.asarray(array, order=order, dtype=dtype)
# At this point array is a NumPy ndarray. We convert it to an array
# container that is consistent with the input's namespace.
return xp.asarray(array)

TypeError: float() argument must be a string or a real number, not 'dict_values'

Semantic Search Example#

We can use embeddings for semantic search - finding the most similar text to a query:

def semantic_search(query, texts, top_k=3):
    """Find the most similar texts to a query using embeddings."""
    
    # Get embedding for the query
    query_embedding = embed_ollama(query)
    
    # Get embeddings for all texts
    text_embeddings = [embed_ollama(text) for text in texts]
    
    # Calculate similarities
    similarities = []
    for text_emb in text_embeddings:
        # Cosine similarity between query and text embeddings
        similarity = np.dot(query_embedding, text_emb) / (
            np.linalg.norm(query_embedding) * np.linalg.norm(text_emb)
        )
        similarities.append(similarity)
    
    # Get top-k most similar texts
    indexed_similarities = [(i, sim) for i, sim in enumerate(similarities)]
    indexed_similarities.sort(key=lambda x: x[1], reverse=True)
    
    return indexed_similarities[:top_k]

# Example search
query = "animal sitting down"
results = semantic_search(query, texts)

print(f"Query: '{query}'")
print("\nMost similar texts:")
for rank, (idx, similarity) in enumerate(results, 1):
    print(f"{rank}. Text {idx+1} (similarity: {similarity:.3f}): {texts[idx]}")

Exercise#

Try different texts and see how the embeddings cluster in the visualization
Experiment with different queries in the semantic search function
Explore other embedding models available in Ollama by running ollama list in your terminal
Compare the results with different embedding models (if you have others installed)

# Your experiments here