Seismic Event Classification using Deep Learning#
Last Updated: May 6, 2025#
Purpose#
This notebook trains a CNN classifier for a multi-class seismic event classification of the followin event types:
Earthquakes
Explosions
Surface events
Noise
The notebook performs the following tasks:
Prepares input data (waveform and spectrogram).
Trains multiple deep learning models.
Evaluates the models using metrics such as loss, accuracy, confusion matrices, and classification reports.
Adding a few missing dependencies..
!pip install seaborn scikit-learn torch torchvision matplotlib
# Enable autoreload
%load_ext autoreload
%autoreload 2
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import h5py
# from tqdm import tqdm
from glob import glob
# import time
import random
from tqdm import tqdm
from scipy import stats,signal
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import random_split
import numpy as np
import scipy.signal as signal
# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from utils import QuakeXNet_1d,QuakeXNet_2d,SeismicCNN_1d,SeismicCNN_2d
from utils import extract_waveforms
from utils import extract_spectrograms
from utils import PNWDataSet
from utils import plot_model_training
from utils import train_model
from utils import plot_confusion_matrix_and_cr
os.makedirs('plot', exist_ok=True)
Key Parameters#
## seismic data parameters
num_channels = 3 # number of channels
fs = 50 # new sampling rate
highcut = 20 # (Hz) high cut frequency
lowcut = 1 # (Hz) low cut frequency
input_window_length = 100 # (seconds)
start = -20 # (seconds) to offset the start time from the P wave
## Training parameters
train_split = 70
val_split=20
test_split = 10
learning_rate=0.001
batch_size=128
n_epochs=30
dropout=0.4
criterion=nn.CrossEntropyLoss()
Data Download#
downloads metadata for seismic event classes from CSV files:
comcat_metadata: Metadata for earthquake and explosion events.exotic_metadata: Metadata for surface events.noise_metadata: Metadata for noise samples.
The data is stored on a shared storage:
datadir = '/data/horse/ws/s4122485-ai4seismology/data/earthquake_seismology/'
#data files
file_noise = datadir + "noise_waveforms.hdf5";
file_comcat = datadir + "mesoPNW_waveforms.hdf5";
file_su = datadir + "exotic_waveforms.hdf5";
# metadata
# accessing the comcat metadata
metadata_comcat = pd.read_csv(datadir+"mesoPNW_metadata.csv")
metadata_su = pd.read_csv(datadir+"exotic_metadata.csv")
metadata_noise = pd.read_csv(datadir+"noise_metadata.csv")
# concatenate all dataframes
metadata = pd.concat([metadata_comcat, metadata_noise, metadata_su], ignore_index=True)
# only select BH channels
# metadata = metadata[metadata['station_channel_code'].str.contains('BH')]
# creating individual data frames for each class
df_exp = metadata[metadata['source_type'] == 'explosion']
df_eq = metadata[metadata['source_type'] == 'earthquake']
df_su = metadata[metadata['source_type'] == 'surface event']
df_no = metadata[metadata['source_type'] == 'noise']
df_no['event_id'] = [df_no['trace_start_time'].values[i]+'_noise' for i in range(len(df_no))]
number_data_per_class = np.min([len(metadata_comcat[metadata_comcat['source_type'] == 'explosion']), len(metadata_comcat[metadata_comcat['source_type'] == 'earthquake']), len(metadata_su[metadata_su['source_type'] == 'surface event']), len(metadata_noise[metadata_noise['source_type'] == 'noise'])])
print('Number of data per class: ', number_data_per_class)
We will choose a balanced data set. Unfortunately, there are way fewer examples of surface events compared to other event types.
Extracting Waveform Data#
Waveforms are extracted from .hdf5 files using the extract_waveforms function. Key parameters:
Number of channels: 3 (e.g., Z, N, E components)
Sampling rate: 50 Hz
Window length: 100 samples
Bandpass filter: 1–20 Hz
Each event class (earthquakes, explosions, surface events, noise) is balanced to contain an equal number of samples.
# surface events
d_su, id_su = extract_waveforms(df_su, file_su, input_window_length = input_window_length, fs=fs,
start =start, number_data = number_data_per_class, num_channels = num_channels,
shifting = True, lowcut = lowcut , highcut =highcut)
print(d_su.shape)
# noise
d_noise, id_noise = extract_waveforms(df_no, file_noise, input_window_length = input_window_length, fs=fs,
start =start, number_data = number_data_per_class, num_channels = num_channels,
shifting = True, lowcut = lowcut , highcut =highcut)
print(d_noise.shape)
# explosions
d_exp, id_exp = extract_waveforms(df_exp, file_comcat, input_window_length = input_window_length, fs=fs,
start =start, number_data = number_data_per_class, num_channels = num_channels,
shifting = True, lowcut = lowcut , highcut =highcut)
# earthquakes
d_eq, id_eq = extract_waveforms(df_eq, file_comcat, input_window_length = input_window_length, fs=fs,
start =start, number_data = number_data_per_class, num_channels = num_channels,
shifting = True, lowcut = lowcut , highcut =highcut)
print(d_eq.shape)
Now that we have four independent datasets, we will combine them to form a single one.
# concatenate all data to make a single X data set, and prepare one-hot encoding of y for the classes on source type
X = np.concatenate((d_eq,d_exp, d_su,d_noise), axis=0)
y = np.concatenate((np.zeros(d_eq.shape[0]), np.ones(d_exp.shape[0]), 2*np.ones(d_su.shape[0]), 3*np.ones(d_noise.shape[0])), axis=0)
y = y.astype(int)
Prepare Pytorch data sets#
1. Time series as input#
# Make the data a PNWDataSet
custom_dataset = PNWDataSet(X,y,4)
# print the shape of the dataset
print('Shape of the dataset: ', custom_dataset.data.shape)
# prepare training and validation sets
train_size = int(len(custom_dataset) * 0.7)
val_size = int(len(custom_dataset) * 0.2)
test_size = len(custom_dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(custom_dataset, [train_size, val_size, test_size])
# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
# Check the number of samples in each set
print(f"Number of samples in training set: {len(train_dataset)}")
print(f"Number of samples in validation set: {len(val_dataset)}")
print(f"Number of samples in test set: {len(test_dataset)}")
2. Spectrograms as input#
# calculate spectrograms of all X
X_spectrograms=extract_spectrograms(X,fs=fs)
print(X_spectrograms.shape)
# create costum dataset for spectrograms
custom_dataset_spectrograms = PNWDataSet(X_spectrograms,y,4)
# prepare training and validation sets
train_size = int(len(custom_dataset_spectrograms) * 0.7)
val_size = int(len(custom_dataset_spectrograms) * 0.2)
test_size = len(custom_dataset_spectrograms) - train_size - val_size
train_dataset_spectrograms, val_dataset_spectrograms, test_dataset_spectrograms = random_split(custom_dataset_spectrograms, [train_size, val_size, test_size])
# Create data loaders
train_loader_spectrograms = torch.utils.data.DataLoader(train_dataset_spectrograms, batch_size=batch_size, shuffle=True)
val_loader_spectrograms = torch.utils.data.DataLoader(val_dataset_spectrograms, batch_size=batch_size, shuffle=True)
test_loader_spectrograms = torch.utils.data.DataLoader(test_dataset_spectrograms, batch_size=batch_size, shuffle=True)
Model training#
SeismicCNN 1D#
# define a model
model1 = SeismicCNN_1d()
# train the model
from utils import train_model
(train_loss1, val_loss1, val_acc1,training_time, test_loss,test_acc) = train_model(model1,
train_loader,
val_loader,
test_loader,
n_epochs=n_epochs,
learning_rate=learning_rate,
criterion=criterion,
augmentation= False,
patience = 30,
model_path = 'trained_models')
plot_model_training(train_loss1, val_loss1, val_acc1,test_loss,test_acc)
plot_confusion_matrix_and_cr(model1, test_loader,
classes = ['earthquake', 'explosion', 'surface event', 'noise'],
fname = 'plot/confusion_matrix_seismic_cnn1.png')
SeismicCNN 2D#
This is a long skinny neural network that takes spectrograms as inputs.
# define a modelß
model2 = SeismicCNN_2d()
(train_loss2, val_loss2, val_acc2,training_time, test_loss,test_acc) = train_model(model2,
train_loader_spectrograms,
val_loader_spectrograms,
test_loader_spectrograms,
n_epochs=20,
learning_rate=learning_rate,
criterion=criterion,
augmentation= False,
patience = 30,
model_path = 'trained_model_seismiccnn_2d')
plot_model_training(train_loss2, val_loss2, val_acc2,test_loss,test_acc)
plot_confusion_matrix_and_cr(model2,test_loader_spectrograms)
QuakeXNet 1D#
model3 = QuakeXNet_1d()
(train_loss3, val_loss3, val_acc3,training_time, test_loss,test_acc) = train_model(model3,
train_loader,
val_loader,
test_loader,
n_epochs=20,
learning_rate=learning_rate,
criterion=criterion,
augmentation= False,
patience = 30,
model_path = 'trained_model_quakeXNet_1d')
plot_model_training(train_loss3, val_loss3, val_acc3,test_loss,test_acc,title="QuakeXNet (1D)")
plot_confusion_matrix_and_cr(model3,test_loader)
QuakeXNet 2D#
model4 = QuakeXNet_2d()
(train_loss4, val_loss4, val_acc4,training_time, test_loss,test_acc) = train_model(model4,
train_loader_spectrograms,
val_loader_spectrograms,
test_loader_spectrograms,
n_epochs=20,
learning_rate=learning_rate,
criterion=criterion,
augmentation= False,
patience = 30,
model_path = 'trained_model_quakeXNet_2d')
plot_model_training(train_loss4, val_loss4, val_acc4,test_loss,test_acc,title="QuakeXNet (2D)")
plot_confusion_matrix_and_cr(model4,test_loader)
# Print the summary
import torchsummary
print('SeismicCNN(1D) Architecture')
torchsummary.summary(model1, input_size=(3,5000))
print('SeismicCNN(2D) Architecture')
torchsummary.summary(model2, input_size=(3,129,38))
print('QuakeXNet (1D) Architecture')
torchsummary.summary(model3, input_size=(3,5000))
print('QuakeXNet(2D) Architecture')
torchsummary.summary(model4, input_size=(3,129,38))