Heart Rate data length of index mismatch fix - FIT app Version

This commit is contained in:
2025-09-14 17:07:33 +02:00
parent 1d005e54c5
commit 3915bc9f12

View File

@@ -27,179 +27,276 @@ from fitparse import FitFile
# === Helper Functions === # === Helper Functions ===
def list_fit_files(): def list_fit_files():
"""
Listet alle .fit Files im Verzeichnis auf und sortiert sie nach Datum
"""
folder = './fit_files' folder = './fit_files'
files = [f for f in os.listdir(folder) if f.lower().endswith('.fit')]
# Extract date from the start of the filename and sort descending # Prüfe ob Ordner existiert
if not os.path.exists(folder):
print(f"Ordner {folder} existiert nicht!")
return [{'label': 'Ordner nicht gefunden', 'value': 'NO_FOLDER'}]
# Hole alle .fit Files
try:
all_files = os.listdir(folder)
files = [f for f in all_files if f.lower().endswith('.fit')]
except Exception as e:
print(f"Fehler beim Lesen des Ordners: {e}")
return [{'label': 'Fehler beim Lesen', 'value': 'ERROR'}]
def extract_date(filename): def extract_date(filename):
"""Extrahiert Datum aus Filename für Sortierung"""
try: try:
return datetime.datetime.strptime(filename[:10], '%d.%m.%Y') # Format DD.MM.YYYY # Versuche verschiedene Datumsformate
return datetime.datetime.strptime(filename[:10], '%d.%m.%Y')
except ValueError: except ValueError:
try: try:
return datetime.datetime.strptime(filename[:10], '%Y-%m-%d') # Format YYYY-MM-DD return datetime.datetime.strptime(filename[:10], '%Y-%m-%d')
except ValueError: except ValueError:
return datetime.datetime.min # Ungültige -> ans Ende try:
# Versuche auch andere Formate
return datetime.datetime.strptime(filename[:8], '%Y%m%d')
except ValueError:
# Wenn kein Datum erkennbar, nutze Datei-Änderungsdatum
try:
file_path = os.path.join(folder, filename)
return datetime.datetime.fromtimestamp(os.path.getmtime(file_path))
except:
return datetime.datetime.min
# Sortiere Files nach Datum (neueste zuerst)
files.sort(key=extract_date, reverse=True) files.sort(key=extract_date, reverse=True)
# Dropdown-Einträge bauen # Erstelle Dropdown-Optionen
if files: if files:
return [{'label': f, 'value': os.path.join(folder, f)} for f in files] options = []
for f in files:
file_path = os.path.join(folder, f)
# Zeige auch Dateigröße und Änderungsdatum an
try:
size_mb = os.path.getsize(file_path) / (1024 * 1024)
mod_time = datetime.datetime.fromtimestamp(os.path.getmtime(file_path))
label = f"{f}"
#label = f"{f} ({size_mb:.1f}MB - {mod_time.strftime('%d.%m.%Y %H:%M')}\n)" # For debugging purpose
except:
label = f
options.append({
'label': label,
'value': file_path
})
return options
else: else:
# Dummy-Eintrag, damit es nie crasht return [{'label': 'Keine .fit Dateien gefunden', 'value': 'NO_FILE'}]
return [{
'label': 'Keine FIT-Datei gefunden',
'value': 'NO_FILE'
}]
def haversine(lon1, lat1, lon2, lat2): def haversine(lon1, lat1, lon2, lat2):
R = 6371 """
Berechnet die Entfernung zwischen zwei GPS-Koordinaten in km
"""
R = 6371 # Erdradius in km
dlon = radians(lon2 - lon1) dlon = radians(lon2 - lon1)
dlat = radians(lat2 - lat1) dlat = radians(lat2 - lat1)
a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2 a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
return 2 * R * asin(sqrt(a)) return 2 * R * asin(sqrt(a))
def process_fit(file_path): def process_fit(file_path):
fit_file = FitFile(file_path) """
Verarbeitet eine FIT-Datei und erstellt einen DataFrame
"""
if file_path in ['NO_FILE', 'NO_FOLDER', 'ERROR']:
print(f"Ungültiger Dateipfad: {file_path}")
return pd.DataFrame()
# Sammle alle record-Daten if not os.path.exists(file_path):
records = [] print(f"Datei nicht gefunden: {file_path}")
for record in fit_file.get_messages("record"): return pd.DataFrame()
record_data = {}
for data in record:
# Sammle alle verfügbaren Datenfelder
record_data[data.name] = data.value
records.append(record_data)
# Erstelle DataFrame try:
df = pd.DataFrame(records) fit_file = FitFile(file_path)
print(f"Verarbeite FIT-Datei: {file_path}")
# Debugging: Schaue welche Spalten verfügbar sind # Sammle alle record-Daten
print(f"Verfügbare Spalten: {df.columns.tolist()}") records = []
for record in fit_file.get_messages("record"):
record_data = {}
for data in record:
# Sammle alle verfügbaren Datenfelder
record_data[data.name] = data.value
records.append(record_data)
# Suche nach Heart Rate in verschiedenen Formaten if not records:
possible_hr_cols = [col for col in df.columns if 'heart' in col.lower() or 'hr' in col.lower()] print("Keine Aufzeichnungsdaten in der FIT-Datei gefunden")
print(f"Mögliche Heart Rate Spalten: {possible_hr_cols}") return pd.DataFrame()
# Standard-Spaltennamen für verschiedene FIT-Formate # Erstelle DataFrame
lat_cols = ['position_lat', 'lat', 'latitude'] df = pd.DataFrame(records)
lon_cols = ['position_long', 'lon', 'longitude'] print(f"DataFrame erstellt mit {len(df)} Zeilen und Spalten: {list(df.columns)}")
elev_cols = ['altitude', 'elev', 'elevation', 'enhanced_altitude']
time_cols = ['timestamp', 'time']
hr_cols = ['heart_rate', 'hr'] + possible_hr_cols
speed_cols = ['speed', 'enhanced_speed']
dist_cols = ['distance', 'total_distance']
# Finde die richtigen Spaltennamen # Debugging: Schaue welche Spalten verfügbar sind
lat_col = next((col for col in lat_cols if col in df.columns), None) print(f"Verfügbare Spalten: {df.columns.tolist()}")
lon_col = next((col for col in lon_cols if col in df.columns), None)
elev_col = next((col for col in elev_cols if col in df.columns), None)
time_col = next((col for col in time_cols if col in df.columns), None)
hr_col = next((col for col in hr_cols if col in df.columns), None)
speed_col = next((col for col in speed_cols if col in df.columns), None)
# Prüfe ob wichtige Daten vorhanden sind # Suche nach Heart Rate in verschiedenen Formaten
if not lat_col or not lon_col or not time_col: possible_hr_cols = [col for col in df.columns if 'heart' in col.lower() or 'hr' in col.lower()]
raise ValueError(f"Wichtige Daten fehlen! Lat: {lat_col}, Lon: {lon_col}, Time: {time_col}") print(f"Mögliche Heart Rate Spalten: {possible_hr_cols}")
# Benenne Spalten einheitlich um # Standard-Spaltennamen für verschiedene FIT-Formate
df = df.rename(columns={ lat_cols = ['position_lat', 'lat', 'latitude']
lat_col: 'lat', lon_cols = ['position_long', 'lon', 'longitude']
lon_col: 'lon', elev_cols = ['altitude', 'elev', 'elevation', 'enhanced_altitude']
elev_col: 'elev' if elev_col else None, time_cols = ['timestamp', 'time']
time_col: 'time', hr_cols = ['heart_rate', 'hr'] + possible_hr_cols
hr_col: 'heart_rate' if hr_col else None, speed_cols = ['speed', 'enhanced_speed']
speed_col: 'speed_ms' if speed_col else None dist_cols = ['distance', 'total_distance']
})
# FIT lat/lon sind oft in semicircles - konvertiere zu Grad # Finde die richtigen Spaltennamen
if df['lat'].max() > 180: # Semicircles detection lat_col = next((col for col in lat_cols if col in df.columns), None)
df['lat'] = df['lat'] * (180 / 2**31) lon_col = next((col for col in lon_cols if col in df.columns), None)
df['lon'] = df['lon'] * (180 / 2**31) elev_col = next((col for col in elev_cols if col in df.columns), None)
time_col = next((col for col in time_cols if col in df.columns), None)
hr_col = next((col for col in hr_cols if col in df.columns), None)
speed_col = next((col for col in speed_cols if col in df.columns), None)
# Entferne Zeilen ohne GPS-Daten # Prüfe ob wichtige Daten vorhanden sind
df = df.dropna(subset=['lat', 'lon', 'time']).reset_index(drop=True) if not lat_col or not lon_col or not time_col:
raise ValueError(f"Wichtige Daten fehlen! Lat: {lat_col}, Lon: {lon_col}, Time: {time_col}")
# Basic cleanup # Benenne Spalten einheitlich um
df['time'] = pd.to_datetime(df['time']) df = df.rename(columns={
df['time_loc'] = df['time'].dt.tz_localize(None) lat_col: 'lat',
df['time_diff'] = df['time'] - df['time'].iloc[0] lon_col: 'lon',
df['time_diff_sec'] = df['time_diff'].dt.total_seconds() elev_col: 'elev' if elev_col else None,
df['duration_hms'] = df['time_diff'].apply(lambda td: str(td).split('.')[0]) time_col: 'time',
hr_col: 'heart_rate' if hr_col else None,
speed_col: 'speed_ms' if speed_col else None
})
# Cumulative distance (km) # FIT lat/lon sind oft in semicircles - konvertiere zu Grad
distances = [0] if df['lat'].max() > 180: # Semicircles detection
for i in range(1, len(df)): df['lat'] = df['lat'] * (180 / 2**31)
d = haversine(df.loc[i-1, 'lon'], df.loc[i-1, 'lat'], df.loc[i, 'lon'], df.loc[i, 'lat']) df['lon'] = df['lon'] * (180 / 2**31)
distances.append(distances[-1] + d)
df['cum_dist_km'] = distances
# Elevation handling # Entferne Zeilen ohne GPS-Daten
if 'elev' in df.columns: df = df.dropna(subset=['lat', 'lon', 'time']).reset_index(drop=True)
df['elev'] = df['elev'].bfill()
df['delta_elev'] = df['elev'].diff().fillna(0) # Basic cleanup
df['rel_elev'] = df['elev'] - df['elev'].iloc[0] df['time'] = pd.to_datetime(df['time'])
df['time_loc'] = df['time'].dt.tz_localize(None)
df['time_diff'] = df['time'] - df['time'].iloc[0]
df['time_diff_sec'] = df['time_diff'].dt.total_seconds()
df['duration_hms'] = df['time_diff'].apply(lambda td: str(td).split('.')[0])
# Cumulative distance (km)
distances = [0]
for i in range(1, len(df)):
d = haversine(df.loc[i-1, 'lon'], df.loc[i-1, 'lat'], df.loc[i, 'lon'], df.loc[i, 'lat'])
distances.append(distances[-1] + d)
df['cum_dist_km'] = distances
# Elevation handling
if 'elev' in df.columns:
df['elev'] = df['elev'].bfill()
df['delta_elev'] = df['elev'].diff().fillna(0)
df['rel_elev'] = df['elev'] - df['elev'].iloc[0]
else:
# Fallback wenn keine Elevation vorhanden
df['elev'] = 0
df['delta_elev'] = 0
df['rel_elev'] = 0
# Speed calculation
if 'speed_ms' in df.columns:
# Konvertiere m/s zu km/h
df['speed_kmh'] = df['speed_ms'] * 3.6
else:
# Fallback: Berechne Speed aus GPS-Daten
df['delta_t'] = df['time'].diff().dt.total_seconds()
df['delta_d'] = df['cum_dist_km'].diff()
df['speed_kmh'] = (df['delta_d'] / df['delta_t']) * 3600
df['speed_kmh'] = df['speed_kmh'].replace([np.inf, -np.inf], np.nan)
# Velocity (used in pace calculations)
df['vel_kmps'] = np.gradient(df['cum_dist_km'], df['time_diff_sec'])
# Smoothed speed (Gaussian rolling)
df['speed_kmh_smooth'] = df['speed_kmh'].rolling(window=10, win_type="gaussian", center=True).mean(std=2)
# Heart rate handling (NEU!)
# ##############
# UPDATE: Da NaN-Problem mit heart_rate, manuell nochmal neu einlesen und überschreiben:
# save heart rate data into variable
heart_rate = []
for record in fit_file.get_messages("record"):
# Records can contain multiple pieces of data (ex: timestamp, latitude, longitude, etc)
for data in record:
# Print the name and value of the data (and the units if it has any)
if data.name == 'heart_rate':
heart_rate.append(data.value)
# Hier variable neu überschrieben:
df = safe_add_column_to_dataframe(df, 'heart_rate', heart_rate)
# ##############
# MY DEBUG:
#print(heart_rate)
if 'heart_rate' in df.columns:
df['heart_rate'] = pd.to_numeric(df['heart_rate'], errors='coerce')
df['hr_smooth'] = df['heart_rate'].rolling(window=5, center=True).mean()
print(f"Heart rate range: {df['heart_rate'].min():.0f} - {df['heart_rate'].max():.0f} bpm")
else:
print("Keine Heart Rate Daten gefunden!")
df['heart_rate'] = np.nan
df['hr_smooth'] = np.nan
print(f"Verarbeitete FIT-Datei: {len(df)} Datenpunkte")
print(f"Distanz: {df['cum_dist_km'].iloc[-1]:.2f} km")
print(f"Dauer: {df['duration_hms'].iloc[-1]}")
return df
except Exception as e:
print(f"Fehler beim Verarbeiten der FIT-Datei {file_path}: {str(e)}")
return pd.DataFrame()
def safe_add_column_to_dataframe(df, column_name, values):
"""
Fügt eine Spalte sicher zu einem DataFrame hinzu, auch wenn die Längen nicht übereinstimmen
"""
if df.empty:
return df
df_len = len(df)
values_len = len(values) if hasattr(values, '__len__') else 0
if values_len == df_len:
# Perfekt - gleiche Länge
df[column_name] = values
elif values_len > df_len:
# Zu viele Werte - kürze sie
print(f"WARNUNG: {column_name} hat {values_len} Werte, DataFrame hat {df_len} Zeilen. Kürze Werte.")
df[column_name] = values[:df_len]
elif values_len < df_len:
# Zu wenige Werte - fülle mit NaN auf
print(f"WARNUNG: {column_name} hat {values_len} Werte, DataFrame hat {df_len} Zeilen. Fülle mit NaN auf.")
extended_values = list(values) + [None] * (df_len - values_len)
df[column_name] = extended_values
else: else:
# Fallback wenn keine Elevation vorhanden # Keine Werte - fülle mit NaN
df['elev'] = 0 print(f"WARNUNG: Keine Werte für {column_name}. Fülle mit NaN.")
df['delta_elev'] = 0 df[column_name] = [None] * df_len
df['rel_elev'] = 0
# Speed calculation
if 'speed_ms' in df.columns:
# Konvertiere m/s zu km/h
df['speed_kmh'] = df['speed_ms'] * 3.6
else:
# Fallback: Berechne Speed aus GPS-Daten
df['delta_t'] = df['time'].diff().dt.total_seconds()
df['delta_d'] = df['cum_dist_km'].diff()
df['speed_kmh'] = (df['delta_d'] / df['delta_t']) * 3600
df['speed_kmh'] = df['speed_kmh'].replace([np.inf, -np.inf], np.nan)
# Velocity (used in pace calculations)
df['vel_kmps'] = np.gradient(df['cum_dist_km'], df['time_diff_sec'])
# Smoothed speed (Gaussian rolling)
df['speed_kmh_smooth'] = df['speed_kmh'].rolling(window=10, win_type="gaussian", center=True).mean(std=2)
# Heart rate handling (NEU!)
# ##############
# UPDATE: Da NaN-Problem mit heart_rate, manuell nochmal neu einlesen und überschreiben:
# save heart rate data into variable
heart_rate = []
for record in fit_file.get_messages("record"):
# Records can contain multiple pieces of data (ex: timestamp, latitude, longitude, etc)
for data in record:
# Print the name and value of the data (and the units if it has any)
if data.name == 'heart_rate':
heart_rate.append(data.value)
# hier variable neu überschrieben:
df['heart_rate'] = heart_rate[:len(df)]
# ##############
# MY DEBUG:
#print(heart_rate)
if 'heart_rate' in df.columns:
df['heart_rate'] = pd.to_numeric(df['heart_rate'], errors='coerce')
df['hr_smooth'] = df['heart_rate'].rolling(window=5, center=True).mean()
print(f"Heart rate range: {df['heart_rate'].min():.0f} - {df['heart_rate'].max():.0f} bpm")
else:
print("Keine Heart Rate Daten gefunden!")
df['heart_rate'] = np.nan
df['hr_smooth'] = np.nan
print(f"Verarbeitete FIT-Datei: {len(df)} Datenpunkte")
print(f"Distanz: {df['cum_dist_km'].iloc[-1]:.2f} km")
print(f"Dauer: {df['duration_hms'].iloc[-1]}")
return df return df
# ============================================================================= # =============================================================================
# INFO BANNER # INFO BANNER
# ============================================================================= # =============================================================================