import pandas as pd
from PIL import Image
import numpy as np
import tifffile as tiff
import os
import glob
from tqdm import tqdm
from cmcrameri import cm
# Load the metadata CSV file
metadata_path = '/Users/laura/projects/Bio-image_analysis_school_ScadsAI/data/BBBC021_v1_image.csv'
metadata_df = pd.read_csv(metadata_path)
metadata_df
TableNumber ImageNumber Image_FileName_DAPI Image_PathName_DAPI Image_FileName_Tubulin Image_PathName_Tubulin Image_FileName_Actin Image_PathName_Actin Image_Metadata_Plate_DAPI Image_Metadata_Well_DAPI Replicate Image_Metadata_Compound Image_Metadata_Concentration
0 4 233 G10_s1_w1BEDC2073-A983-4B98-95E9-84466707A25D.tif Week4/Week4_27481 G10_s1_w2DCEC82F3-05F7-4F2F-B779-C5DF9698141E.tif Week4/Week4_27481 G10_s1_w43CD51CBC-2370-471F-BA01-EE250B14B3C8.tif Week4/Week4_27481 Week4_27481 G10 1 5-fluorouracil 0.003
1 4 234 G10_s2_w11C3B9BCC-E48F-4C2F-9D31-8F46D8B5B972.tif Week4/Week4_27481 G10_s2_w2570437EF-C8DC-4074-8D63-7FA3A7271FEE.tif Week4/Week4_27481 G10_s2_w400B21F33-BDAB-4363-92C2-F4FB7545F08C.tif Week4/Week4_27481 Week4_27481 G10 1 5-fluorouracil 0.003
2 4 235 G10_s3_w1F4FCE330-C71C-4CA3-9815-EAF9B9876EB5.tif Week4/Week4_27481 G10_s3_w2194A9AC7-369B-4D84-99C0-DA809B0042B8.tif Week4/Week4_27481 G10_s3_w4E0452054-9FC1-41AB-8C5B-D0ACD058991F.tif Week4/Week4_27481 Week4_27481 G10 1 5-fluorouracil 0.003
3 4 236 G10_s4_w1747818B4-FFA7-40EE-B0A0-6A5974AF2644.tif Week4/Week4_27481 G10_s4_w298D4652F-B5BF-49F2-BE51-8149DF83EAFD.tif Week4/Week4_27481 G10_s4_w42648D36D-6B77-41CD-B520-6E4C533D9ABC.tif Week4/Week4_27481 Week4_27481 G10 1 5-fluorouracil 0.003
4 4 473 G10_s1_w10034568D-CC12-43C3-93A9-DC3782099DD3.tif Week4/Week4_27521 G10_s1_w2A29ED14B-952C-4BA1-89B9-4F92B6DADEB4.tif Week4/Week4_27521 G10_s1_w4DAA2E9D1-F6E9-45FA-ADC0-D341B647A680.tif Week4/Week4_27521 Week4_27521 G10 2 5-fluorouracil 0.003
... ... ... ... ... ... ... ... ... ... ... ... ... ...
13195 9 3560 Week9_090907_F11_s4_w19580FF4D-DC3D-4BD0-93FE-... Week9/Week9_39301 Week9_090907_F11_s4_w2DC65EC6F-BDCA-4B05-B243-... Week9/Week9_39301 Week9_090907_F11_s4_w45699A0F4-9AEE-4CD4-8973-... Week9/Week9_39301 Week9_39301 F11 3 DMSO 0.000
13196 9 3597 Week9_090907_G11_s1_w1EDE534D2-FCEE-4F92-A30B-... Week9/Week9_39301 Week9_090907_G11_s1_w26A22E27F-6A81-43F5-9587-... Week9/Week9_39301 Week9_090907_G11_s1_w4554A2BF7-0D53-4D27-BF92-... Week9/Week9_39301 Week9_39301 G11 3 DMSO 0.000
13197 9 3598 Week9_090907_G11_s2_w10B010F39-3B4B-4DCB-8E34-... Week9/Week9_39301 Week9_090907_G11_s2_w2720AC778-3F85-4293-8D75-... Week9/Week9_39301 Week9_090907_G11_s2_w49B290958-BCF2-4DDD-B0E9-... Week9/Week9_39301 Week9_39301 G11 3 DMSO 0.000
13198 9 3599 Week9_090907_G11_s3_w10394282C-6D3D-4E0E-9FA3-... Week9/Week9_39301 Week9_090907_G11_s3_w24C59DB62-E99B-4284-BAD2-... Week9/Week9_39301 Week9_090907_G11_s3_w471FE25C8-2477-456F-9D74-... Week9/Week9_39301 Week9_39301 G11 3 DMSO 0.000
13199 9 3600 Week9_090907_G11_s4_w1C447A151-1F85-4E19-9C96-... Week9/Week9_39301 Week9_090907_G11_s4_w22E574F48-321D-4470-ACC4-... Week9/Week9_39301 Week9_090907_G11_s4_w4200C5003-7F75-47DF-928C-... Week9/Week9_39301 Week9_39301 G11 3 DMSO 0.000

13200 rows × 13 columns

# Let's look only at Week 1 to simplify our dataset
filtered_metadata = metadata_df[metadata_df['Image_PathName_DAPI'].str.contains('Week1_')]
# Further filter to include only filenames with "s1" in the DAPI image filenames (looking only at site 1)
filtered_metadata = filtered_metadata[filtered_metadata['Image_FileName_DAPI'].str.contains('s1')]
# Let's look for now only at replicates 1
filtered_metadata = filtered_metadata[filtered_metadata['Replicate'] == 1]
filtered_metadata
TableNumber ImageNumber Image_FileName_DAPI Image_PathName_DAPI Image_FileName_Tubulin Image_PathName_Tubulin Image_FileName_Actin Image_PathName_Actin Image_Metadata_Plate_DAPI Image_Metadata_Well_DAPI Replicate Image_Metadata_Compound Image_Metadata_Concentration
672 1 73 Week1_150607_C10_s1_w171173D63-FDAC-457A-9E33-... Week1/Week1_22123 Week1_150607_C10_s1_w29DCB015C-38C2-41D2-A798-... Week1/Week1_22123 Week1_150607_C10_s1_w48B202CFA-2040-4647-8F8B-... Week1/Week1_22123 Week1_22123 C10 1 aphidicolin 0.003
684 1 69 Week1_150607_C09_s1_w1CC450920-31F4-45D3-B500-... Week1/Week1_22123 Week1_150607_C09_s1_w27BEDAEBE-0D98-4D1A-A6DD-... Week1/Week1_22123 Week1_150607_C09_s1_w4190BB82C-2D17-4263-A251-... Week1/Week1_22123 Week1_22123 C09 1 aphidicolin 0.010
696 1 65 Week1_150607_C08_s1_w1F53B52B0-CE67-45C2-9A69-... Week1/Week1_22123 Week1_150607_C08_s1_w25D455F3B-E5DB-44A9-85CF-... Week1/Week1_22123 Week1_150607_C08_s1_w433CA7503-C3C0-4202-9E29-... Week1/Week1_22123 Week1_22123 C08 1 aphidicolin 0.030
708 1 61 Week1_150607_C07_s1_w1C8C66DFB-08F8-4AAA-BB23-... Week1/Week1_22123 Week1_150607_C07_s1_w2E533E75A-2247-476A-B195-... Week1/Week1_22123 Week1_150607_C07_s1_w46D54AA9A-CB2C-4F47-BD09-... Week1/Week1_22123 Week1_22123 C07 1 aphidicolin 0.100
720 1 57 Week1_150607_C06_s1_w10E977263-BE9D-4ED2-9931-... Week1/Week1_22123 Week1_150607_C06_s1_w20B01C3F8-1CA8-442E-8965-... Week1/Week1_22123 Week1_150607_C06_s1_w4B1ED3D57-0D79-49E4-94B7-... Week1/Week1_22123 Week1_22123 C06 1 aphidicolin 0.300
... ... ... ... ... ... ... ... ... ... ... ... ... ...
12028 1 2921 Week1_150607_C02_s1_w1ABFACD53-F9A2-4139-8EB1-... Week1/Week1_22361 Week1_150607_C02_s1_w29414A130-7191-4B7E-B61A-... Week1/Week1_22361 Week1_150607_C02_s1_w431C347AD-6596-426A-B8EF-... Week1/Week1_22361 Week1_22361 C02 1 DMSO 0.000
12032 1 2961 Week1_150607_D02_s1_w105AB7990-5B18-4F15-A679-... Week1/Week1_22361 Week1_150607_D02_s1_w297C73E52-531B-4406-834E-... Week1/Week1_22361 Week1_150607_D02_s1_w4CA0A136A-6645-4DA7-853B-... Week1/Week1_22361 Week1_22361 D02 1 DMSO 0.000
12036 1 3037 Week1_150607_E11_s1_w1547D9388-86B4-4D2A-85C4-... Week1/Week1_22361 Week1_150607_E11_s1_w2067628A9-A6E6-4912-A8F8-... Week1/Week1_22361 Week1_150607_E11_s1_w45381CCE8-6387-465D-B9E0-... Week1/Week1_22361 Week1_22361 E11 1 DMSO 0.000
12040 1 3077 Week1_150607_F11_s1_w15D6B81BE-146D-4A68-84C7-... Week1/Week1_22361 Week1_150607_F11_s1_w25A53438C-F6B9-4AF3-B6CD-... Week1/Week1_22361 Week1_150607_F11_s1_w40C9E2448-B597-4452-9C48-... Week1/Week1_22361 Week1_22361 F11 1 DMSO 0.000
12044 1 3117 Week1_150607_G11_s1_w1177CE3C6-E958-4783-9D89-... Week1/Week1_22361 Week1_150607_G11_s1_w295E4DA49-E19D-4C81-AC76-... Week1/Week1_22361 Week1_150607_G11_s1_w41FCBABBC-ECF0-4900-A443-... Week1/Week1_22361 Week1_22361 G11 1 DMSO 0.000

120 rows × 13 columns

print(f"Available compounds: {filtered_metadata['Image_Metadata_Compound'].unique()}")
print(f"Number of available compounds: {len(filtered_metadata['Image_Metadata_Compound'].unique())}")
Available compounds: ['aphidicolin' 'colchicine' 'cytochalasin B' 'doxorubicin' 'epothilone B'
 'latrunculin B' 'monastrol' 'nocodazole' 'taxol' 'AZ-A' 'AZ-H' 'AZ-I'
 'DMSO']
Number of available compounds: 13

Create a multichannel timelapse (DNA [DAPI], F-actin, and Β-tubulin) for each of these compounds, where each timepoint is different concentration#

# Group by compound to simulate timelapse frames based on increasing concentration
compound_groups = filtered_metadata.groupby('Image_Metadata_Compound')
# For each compound, we'll prepare a list of timelapse frames for demonstration
timelapse_frames = {}


for compound, group in compound_groups:
    # Extracting only the first three concentrations for simplicity
    unique_concentrations = sorted(group['Image_Metadata_Concentration'].unique()[:])
    frames = []
    for concentration in unique_concentrations:
        # Selecting the first occurrence of each concentration
        concentration_group = group[group['Image_Metadata_Concentration'] == concentration].iloc[0]
        frames.append({
            'DAPI': concentration_group['Image_FileName_DAPI'],
            'Tubulin': concentration_group['Image_FileName_Tubulin'],
            'Actin': concentration_group['Image_FileName_Actin'],
            'Concentration': concentration
        })
    timelapse_frames[compound] = frames
timelapse_frames.keys()
dict_keys(['AZ-A', 'AZ-H', 'AZ-I', 'DMSO', 'aphidicolin', 'colchicine', 'cytochalasin B', 'doxorubicin', 'epothilone B', 'latrunculin B', 'monastrol', 'nocodazole', 'taxol'])
timelapse_frames['nocodazole']
[{'DAPI': 'Week1_150607_E10_s1_w1CA7826FB-76BC-496D-9AD3-93BD9CDD6191.tif',
  'Tubulin': 'Week1_150607_E10_s1_w225625006-6AF8-45F2-A1D8-74B8F751F88F.tif',
  'Actin': 'Week1_150607_E10_s1_w4288C0B45-A218-462A-ADF9-9981999D71DB.tif',
  'Concentration': 0.001},
 {'DAPI': 'Week1_150607_E09_s1_w1B5C7A17F-3BAB-4CBC-AC5F-330FDA00BA72.tif',
  'Tubulin': 'Week1_150607_E09_s1_w2D013258C-70C2-4A4C-8E6D-88DA824429D1.tif',
  'Actin': 'Week1_150607_E09_s1_w4B88F72FD-5E35-4179-BF5C-FE59EC7C7B33.tif',
  'Concentration': 0.003},
 {'DAPI': 'Week1_150607_E08_s1_w1CEEA4B32-8E5E-4A7E-96B4-7DBA712707EE.tif',
  'Tubulin': 'Week1_150607_E08_s1_w216AE8CA6-1EE1-4051-9BEB-FEC620076A83.tif',
  'Actin': 'Week1_150607_E08_s1_w4FB151688-AE38-4413-9B96-CF3F90C1046B.tif',
  'Concentration': 0.01},
 {'DAPI': 'Week1_150607_E07_s1_w13C94606D-7D62-433B-9FC8-6866ED66FE56.tif',
  'Tubulin': 'Week1_150607_E07_s1_w28793CA02-5665-40C7-AD77-B506FFD23023.tif',
  'Actin': 'Week1_150607_E07_s1_w4DBE94139-AD8E-4552-9A3F-B3C4890EDF70.tif',
  'Concentration': 0.03},
 {'DAPI': 'Week1_150607_E06_s1_w12A799099-5B5E-4113-B5BE-7C33EF966E74.tif',
  'Tubulin': 'Week1_150607_E06_s1_w2450A3D76-BED1-441E-92F3-3F333AF2566E.tif',
  'Actin': 'Week1_150607_E06_s1_w44DF425B3-7A78-4C61-8A63-1FC1936EC795.tif',
  'Concentration': 0.1},
 {'DAPI': 'Week1_150607_E05_s1_w1FB128A45-74C9-4CC3-8E67-021B9FD15CE2.tif',
  'Tubulin': 'Week1_150607_E05_s1_w25638D64A-4A43-43C2-A060-5136F0676AE6.tif',
  'Actin': 'Week1_150607_E05_s1_w481DC059D-76AC-449F-9F98-1928951CC7B3.tif',
  'Concentration': 0.3},
 {'DAPI': 'Week1_150607_E04_s1_w171BF0848-4027-44D3-B6B8-863C8523F226.tif',
  'Tubulin': 'Week1_150607_E04_s1_w2C709B861-E4A8-499D-B575-33072064F217.tif',
  'Actin': 'Week1_150607_E04_s1_w479EC1B1E-86DB-4761-886C-1F96AC095E30.tif',
  'Concentration': 1.0},
 {'DAPI': 'Week1_150607_E03_s1_w1CC9CF6DE-5D49-485C-B53A-C5F23960F132.tif',
  'Tubulin': 'Week1_150607_E03_s1_w2104E0137-752D-45DB-ACA6-2438B95BD17D.tif',
  'Actin': 'Week1_150607_E03_s1_w44CBCF5A0-2A59-4426-925F-5E5673C685FF.tif',
  'Concentration': 3.0}]
data_path = "/Users/laura/projects/Bio-image_analysis_school_ScadsAI/data"
save_path = "/Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset"

def open_image(file_name):
    # Construct a search pattern to look for the file across all subfolders
    search_pattern = os.path.join(data_path, '*', file_name)
    file_list = glob.glob(search_pattern)  # Find all matching files
    
    if not file_list:
        raise FileNotFoundError(f"File {file_name} not found in any subdirectory under {data_path}.")
    
    return Image.open(file_list[0])

def normalize_image(image):
    """ Normalize the image data to [0, 1] """
    image_min = image.min()
    image_max = image.max()
    return (image - image_min) / (image_max - image_min)


for compound, frames in tqdm(timelapse_frames.items()):
    multichannel_images = []

    for frame in frames:
        # Load images for DAPI, Tubulin, and Actin channels
        dapi_image = open_image(frame['DAPI'])
        tubulin_image = open_image(frame['Tubulin'])
        actin_image = open_image(frame['Actin'])

        # Convert images to arrays for merging
        dapi_array = normalize_image(np.array(dapi_image))
        tubulin_array = normalize_image(np.array(tubulin_image))
        actin_array = normalize_image(np.array(actin_image))

        # Stack arrays along a new dimension to create a multichannel image
        # Order of channels in the stack can be adjusted based on visualization preferences
        multichannel_array = np.stack([tubulin_array, actin_array, dapi_array], axis=-1)

        multichannel_images.append(multichannel_array)

    # Save the sequence of multichannel images as a TIFF stack (timelapse)
    timelapse_filename = f"{compound}_timelapse.tif"
    tiff.imwrite(os.path.join(save_path, timelapse_filename), [np.array(img) for img in multichannel_images], photometric='rgb')

    print(f"Saved timelapse for {compound} to {os.path.join(save_path, timelapse_filename)}.")
  8%|██████████▍                                                                                                                             | 1/13 [00:00<00:06,  1.72it/s]
Saved timelapse for AZ-A to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/AZ-A_timelapse.tif.
 15%|████████████████████▉                                                                                                                   | 2/13 [00:01<00:06,  1.80it/s]
Saved timelapse for AZ-H to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/AZ-H_timelapse.tif.
 31%|█████████████████████████████████████████▊                                                                                              | 4/13 [00:02<00:04,  1.93it/s]
Saved timelapse for AZ-I to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/AZ-I_timelapse.tif.
Saved timelapse for DMSO to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/DMSO_timelapse.tif.
 38%|████████████████████████████████████████████████████▎                                                                                   | 5/13 [00:02<00:03,  2.03it/s]
Saved timelapse for aphidicolin to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/aphidicolin_timelapse.tif.
 46%|██████████████████████████████████████████████████████████████▊                                                                         | 6/13 [00:03<00:03,  2.00it/s]
Saved timelapse for colchicine to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/colchicine_timelapse.tif.
 54%|█████████████████████████████████████████████████████████████████████████▏                                                              | 7/13 [00:03<00:03,  1.91it/s]
Saved timelapse for cytochalasin B to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/cytochalasin B_timelapse.tif.
 62%|███████████████████████████████████████████████████████████████████████████████████▋                                                    | 8/13 [00:04<00:02,  1.74it/s]
Saved timelapse for doxorubicin to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/doxorubicin_timelapse.tif.
 69%|██████████████████████████████████████████████████████████████████████████████████████████████▏                                         | 9/13 [00:04<00:02,  1.81it/s]
Saved timelapse for epothilone B to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/epothilone B_timelapse.tif.
 77%|███████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 10/13 [00:05<00:01,  1.84it/s]
Saved timelapse for latrunculin B to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/latrunculin B_timelapse.tif.
 85%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                    | 11/13 [00:06<00:01,  1.70it/s]
Saved timelapse for monastrol to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/monastrol_timelapse.tif.
 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌          | 12/13 [00:06<00:00,  1.64it/s]
Saved timelapse for nocodazole to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/nocodazole_timelapse.tif.
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:07<00:00,  1.75it/s]
Saved timelapse for taxol to /Users/laura/projects/Bio-image_analysis_school_ScadsAI/prepared_dataset/taxol_timelapse.tif.

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.manifold import TSNE

# Step 1: Generate a sample dataset
n_samples = 500
n_features = 10
n_clusters = 5

X, y = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_clusters, random_state=42)

# Step 2: Apply t-SNE with different perplexity values
perplexities = [2, 5, 15]
tsne_results = {}

for perplexity in perplexities:
    tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42)
    tsne_results[perplexity] = tsne.fit_transform(X)

# Step 3: Plot the results
fig, axes = plt.subplots(1, 3, figsize=(10, 5))

for ax, perplexity in zip(axes, perplexities):
    scatter = ax.scatter(tsne_results[perplexity][:, 0], tsne_results[perplexity][:, 1], c=y, cmap=cm.batlow)
    ax.set_title(f'Perplexity = {perplexity}', fontsize=26)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis('off')

plt.tight_layout()
plt.savefig("/Users/laura/Downloads/perplexity.png")
plt.show()
../_images/9fe815184883f09fd300a88b8cc2f62af5addfdc18485fe1e168a0e196b2447c.png
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.manifold import TSNE
import numpy as np

# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)

# Select a subset of the dataset
num_samples = 70000
indices = np.random.choice(len(mnist.data), num_samples, replace=False)
X_subset = mnist.data.iloc[indices].values
y_subset = mnist.target.iloc[indices].astype(int)

# Apply t-SNE to the subset
tsne = TSNE(n_components=2, perplexity=50, random_state=42)
X_tsne = tsne.fit_transform(X_subset)

# Plot the t-SNE results
plt.figure(figsize=(10, 8))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y_subset, cmap='tab10', s=10)
plt.colorbar(scatter)
plt.title('t-SNE visualization of MNIST dataset')
plt.xlabel('t-SNE 1')
plt.ylabel('t-SNE 2')
plt.show()
../_images/6dd70966b51c490a2710e18d7705efe933682be4cb9818edffb95e3b1af41378.png
plt.figure(figsize=(10, 8))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y_subset, cmap=cm.batlow, s=8, alpha=0.7)
plt.colorbar(scatter)
plt.title('t-SNE visualization of MNIST dataset')
plt.xlabel('t-SNE 1')
plt.ylabel('t-SNE 2')
plt.axis('off')

# Add a legend to the right
unique_labels = np.unique(y_subset)
for i in unique_labels:
    plt.scatter([], [], c=scatter.cmap(i / 9), label=str(i))
plt.legend(title="Digits")

plt.gca().collections[0].colorbar = None

plt.savefig("/Users/laura/Downloads/mnist.png")
plt.show()
/var/folders/gn/h45356j57bs_gzpwvw6kkbn40000gn/T/ipykernel_91923/275149631.py:12: UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.
  plt.scatter([], [], c=scatter.cmap(i / 9), label=str(i))
../_images/32b1ab04fdb79f78e5e7bccedce86f13ad5e9c7bacf32ed6d58c309bb6c88f64.png
from sklearn.decomposition import PCA

# Apply PCA to the subset
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_subset)

# Plot the PCA results with a legend
plt.figure(figsize=(10, 8))

# Create a scatter plot with different colors for each digit
# for i in np.unique(y_subset):
scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y_subset,  s=10, alpha=0.6, cmap='Set1')
    #plt.scatter(X_pca[y_subset == i, 0], X_pca[y_subset == i, 1], label=str(i), s=10, c=y_subset)

plt.title('PCA visualization of MNIST dataset', fontsize=18)
plt.xlabel('PCA 1')
plt.ylabel('PCA 2')
plt.axis('off')

unique_labels = np.unique(y_subset)
for i in unique_labels:
    plt.scatter([], [], c=scatter.cmap(i / 9), label=str(i))
plt.legend(title="Digits", loc='center left', bbox_to_anchor=(1, 0.5), fontsize=18)

#plt.savefig("/Users/laura/Downloads/mnist_pca.png", bbox_inches='tight', dpi=300)
plt.show()
/var/folders/gn/h45356j57bs_gzpwvw6kkbn40000gn/T/ipykernel_91923/353009687.py:22: UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.
  plt.scatter([], [], c=scatter.cmap(i / 9), label=str(i))
../_images/f3838edf523156906e44c130b317ced997bf34c24f394dbef7bd59945e847730.png