# Wetterdienst: https://pypi.org/project/wetterdienst/
from wetterdienst import Settings
from wetterdienst.provider.dwd.observation import DwdObservationRequest, DwdObservationMetadataDWD - Stations with station IDs: https://
# Load daily climate summary data from ...
# 183 - Arkona
# 722 - Brocken
# 2667 - Köln-Bonn
# 2932 - Leipzig
# 5792 - Zugspitze
# ... for years 1975-2024
request = DwdObservationRequest(
parameters=DwdObservationMetadata.daily.climate_summary,
start_date="1975-01-01",
end_date="2024-12-31",
settings=Settings(
ts_shape="wide", # tidy data
ts_humanize=True, # humanized parameters
ts_convert_units=False # do not convert values to SI units
)
).filter_by_station_id([183, 722, 2667, 2932, 5792])
# Load values
df = request.values.all().df.to_pandas().set_index("date")
# Drop columns with quality flags and just on distinct value
df_selected = df.loc[:, ~df.columns.str.startswith("qn_") & (df.nunique() != 1)]
# Remove timezone from datetime index, this triggers warnings in some tsa libraries
df_selected.index = df_selected.index.tz_convert(None)
df_selected.info()<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 91315 entries, 1975-01-01 to 2024-12-31
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 station_id 91315 non-null object
1 wind_gust_max 86857 non-null float64
2 wind_speed 90284 non-null float64
3 precipitation_height 90913 non-null float64
4 precipitation_form 91086 non-null float64
5 sunshine_duration 91054 non-null float64
6 snow_depth 89281 non-null float64
7 cloud_cover_total 91179 non-null float64
8 pressure_vapor 91260 non-null float64
9 pressure_air_site 91289 non-null float64
10 temperature_air_mean_2m 91304 non-null float64
11 humidity 91260 non-null float64
12 temperature_air_max_2m 91304 non-null float64
13 temperature_air_min_2m 91304 non-null float64
14 temperature_air_min_0_05m 72718 non-null float64
dtypes: float64(14), object(1)
memory usage: 11.1+ MB
# Save data station-wise
for station_id in df_selected.station_id.unique():
station_data = df_selected[df_selected.station_id == station_id]
station_data = station_data.drop(columns=["station_id"])
station_data.to_csv(f"data/dwd_{station_id}_climate.csv", sep=';', index=True)