Heart Rate data length of index mismatch fix - FIT app Version
This commit is contained in:
367
fit_app.py
367
fit_app.py
@@ -27,179 +27,276 @@ from fitparse import FitFile
|
|||||||
|
|
||||||
# === Helper Functions ===
|
# === Helper Functions ===
|
||||||
def list_fit_files():
|
def list_fit_files():
|
||||||
|
"""
|
||||||
|
Listet alle .fit Files im Verzeichnis auf und sortiert sie nach Datum
|
||||||
|
"""
|
||||||
folder = './fit_files'
|
folder = './fit_files'
|
||||||
files = [f for f in os.listdir(folder) if f.lower().endswith('.fit')]
|
|
||||||
|
|
||||||
# Extract date from the start of the filename and sort descending
|
# Prüfe ob Ordner existiert
|
||||||
|
if not os.path.exists(folder):
|
||||||
|
print(f"Ordner {folder} existiert nicht!")
|
||||||
|
return [{'label': 'Ordner nicht gefunden', 'value': 'NO_FOLDER'}]
|
||||||
|
|
||||||
|
# Hole alle .fit Files
|
||||||
|
try:
|
||||||
|
all_files = os.listdir(folder)
|
||||||
|
files = [f for f in all_files if f.lower().endswith('.fit')]
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Fehler beim Lesen des Ordners: {e}")
|
||||||
|
return [{'label': 'Fehler beim Lesen', 'value': 'ERROR'}]
|
||||||
|
|
||||||
def extract_date(filename):
|
def extract_date(filename):
|
||||||
|
"""Extrahiert Datum aus Filename für Sortierung"""
|
||||||
try:
|
try:
|
||||||
return datetime.datetime.strptime(filename[:10], '%d.%m.%Y') # Format DD.MM.YYYY
|
# Versuche verschiedene Datumsformate
|
||||||
|
return datetime.datetime.strptime(filename[:10], '%d.%m.%Y')
|
||||||
except ValueError:
|
except ValueError:
|
||||||
try:
|
try:
|
||||||
return datetime.datetime.strptime(filename[:10], '%Y-%m-%d') # Format YYYY-MM-DD
|
return datetime.datetime.strptime(filename[:10], '%Y-%m-%d')
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return datetime.datetime.min # Ungültige -> ans Ende
|
try:
|
||||||
|
# Versuche auch andere Formate
|
||||||
|
return datetime.datetime.strptime(filename[:8], '%Y%m%d')
|
||||||
|
except ValueError:
|
||||||
|
# Wenn kein Datum erkennbar, nutze Datei-Änderungsdatum
|
||||||
|
try:
|
||||||
|
file_path = os.path.join(folder, filename)
|
||||||
|
return datetime.datetime.fromtimestamp(os.path.getmtime(file_path))
|
||||||
|
except:
|
||||||
|
return datetime.datetime.min
|
||||||
|
|
||||||
|
# Sortiere Files nach Datum (neueste zuerst)
|
||||||
files.sort(key=extract_date, reverse=True)
|
files.sort(key=extract_date, reverse=True)
|
||||||
|
|
||||||
# Dropdown-Einträge bauen
|
# Erstelle Dropdown-Optionen
|
||||||
if files:
|
if files:
|
||||||
return [{'label': f, 'value': os.path.join(folder, f)} for f in files]
|
options = []
|
||||||
|
for f in files:
|
||||||
|
file_path = os.path.join(folder, f)
|
||||||
|
# Zeige auch Dateigröße und Änderungsdatum an
|
||||||
|
try:
|
||||||
|
size_mb = os.path.getsize(file_path) / (1024 * 1024)
|
||||||
|
mod_time = datetime.datetime.fromtimestamp(os.path.getmtime(file_path))
|
||||||
|
label = f"{f}"
|
||||||
|
#label = f"{f} ({size_mb:.1f}MB - {mod_time.strftime('%d.%m.%Y %H:%M')}\n)" # For debugging purpose
|
||||||
|
except:
|
||||||
|
label = f
|
||||||
|
|
||||||
|
options.append({
|
||||||
|
'label': label,
|
||||||
|
'value': file_path
|
||||||
|
})
|
||||||
|
return options
|
||||||
else:
|
else:
|
||||||
# Dummy-Eintrag, damit es nie crasht
|
return [{'label': 'Keine .fit Dateien gefunden', 'value': 'NO_FILE'}]
|
||||||
return [{
|
|
||||||
'label': 'Keine FIT-Datei gefunden',
|
|
||||||
'value': 'NO_FILE'
|
|
||||||
}]
|
|
||||||
|
|
||||||
def haversine(lon1, lat1, lon2, lat2):
|
def haversine(lon1, lat1, lon2, lat2):
|
||||||
R = 6371
|
"""
|
||||||
|
Berechnet die Entfernung zwischen zwei GPS-Koordinaten in km
|
||||||
|
"""
|
||||||
|
R = 6371 # Erdradius in km
|
||||||
dlon = radians(lon2 - lon1)
|
dlon = radians(lon2 - lon1)
|
||||||
dlat = radians(lat2 - lat1)
|
dlat = radians(lat2 - lat1)
|
||||||
a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
|
a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
|
||||||
return 2 * R * asin(sqrt(a))
|
return 2 * R * asin(sqrt(a))
|
||||||
|
|
||||||
def process_fit(file_path):
|
def process_fit(file_path):
|
||||||
fit_file = FitFile(file_path)
|
"""
|
||||||
|
Verarbeitet eine FIT-Datei und erstellt einen DataFrame
|
||||||
|
"""
|
||||||
|
if file_path in ['NO_FILE', 'NO_FOLDER', 'ERROR']:
|
||||||
|
print(f"Ungültiger Dateipfad: {file_path}")
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
# Sammle alle record-Daten
|
if not os.path.exists(file_path):
|
||||||
records = []
|
print(f"Datei nicht gefunden: {file_path}")
|
||||||
for record in fit_file.get_messages("record"):
|
return pd.DataFrame()
|
||||||
record_data = {}
|
|
||||||
for data in record:
|
|
||||||
# Sammle alle verfügbaren Datenfelder
|
|
||||||
record_data[data.name] = data.value
|
|
||||||
records.append(record_data)
|
|
||||||
|
|
||||||
# Erstelle DataFrame
|
try:
|
||||||
df = pd.DataFrame(records)
|
fit_file = FitFile(file_path)
|
||||||
|
print(f"Verarbeite FIT-Datei: {file_path}")
|
||||||
|
|
||||||
# Debugging: Schaue welche Spalten verfügbar sind
|
# Sammle alle record-Daten
|
||||||
print(f"Verfügbare Spalten: {df.columns.tolist()}")
|
records = []
|
||||||
|
for record in fit_file.get_messages("record"):
|
||||||
|
record_data = {}
|
||||||
|
for data in record:
|
||||||
|
# Sammle alle verfügbaren Datenfelder
|
||||||
|
record_data[data.name] = data.value
|
||||||
|
records.append(record_data)
|
||||||
|
|
||||||
# Suche nach Heart Rate in verschiedenen Formaten
|
if not records:
|
||||||
possible_hr_cols = [col for col in df.columns if 'heart' in col.lower() or 'hr' in col.lower()]
|
print("Keine Aufzeichnungsdaten in der FIT-Datei gefunden")
|
||||||
print(f"Mögliche Heart Rate Spalten: {possible_hr_cols}")
|
return pd.DataFrame()
|
||||||
|
|
||||||
# Standard-Spaltennamen für verschiedene FIT-Formate
|
# Erstelle DataFrame
|
||||||
lat_cols = ['position_lat', 'lat', 'latitude']
|
df = pd.DataFrame(records)
|
||||||
lon_cols = ['position_long', 'lon', 'longitude']
|
print(f"DataFrame erstellt mit {len(df)} Zeilen und Spalten: {list(df.columns)}")
|
||||||
elev_cols = ['altitude', 'elev', 'elevation', 'enhanced_altitude']
|
|
||||||
time_cols = ['timestamp', 'time']
|
|
||||||
hr_cols = ['heart_rate', 'hr'] + possible_hr_cols
|
|
||||||
speed_cols = ['speed', 'enhanced_speed']
|
|
||||||
dist_cols = ['distance', 'total_distance']
|
|
||||||
|
|
||||||
# Finde die richtigen Spaltennamen
|
# Debugging: Schaue welche Spalten verfügbar sind
|
||||||
lat_col = next((col for col in lat_cols if col in df.columns), None)
|
print(f"Verfügbare Spalten: {df.columns.tolist()}")
|
||||||
lon_col = next((col for col in lon_cols if col in df.columns), None)
|
|
||||||
elev_col = next((col for col in elev_cols if col in df.columns), None)
|
|
||||||
time_col = next((col for col in time_cols if col in df.columns), None)
|
|
||||||
hr_col = next((col for col in hr_cols if col in df.columns), None)
|
|
||||||
speed_col = next((col for col in speed_cols if col in df.columns), None)
|
|
||||||
|
|
||||||
# Prüfe ob wichtige Daten vorhanden sind
|
# Suche nach Heart Rate in verschiedenen Formaten
|
||||||
if not lat_col or not lon_col or not time_col:
|
possible_hr_cols = [col for col in df.columns if 'heart' in col.lower() or 'hr' in col.lower()]
|
||||||
raise ValueError(f"Wichtige Daten fehlen! Lat: {lat_col}, Lon: {lon_col}, Time: {time_col}")
|
print(f"Mögliche Heart Rate Spalten: {possible_hr_cols}")
|
||||||
|
|
||||||
# Benenne Spalten einheitlich um
|
# Standard-Spaltennamen für verschiedene FIT-Formate
|
||||||
df = df.rename(columns={
|
lat_cols = ['position_lat', 'lat', 'latitude']
|
||||||
lat_col: 'lat',
|
lon_cols = ['position_long', 'lon', 'longitude']
|
||||||
lon_col: 'lon',
|
elev_cols = ['altitude', 'elev', 'elevation', 'enhanced_altitude']
|
||||||
elev_col: 'elev' if elev_col else None,
|
time_cols = ['timestamp', 'time']
|
||||||
time_col: 'time',
|
hr_cols = ['heart_rate', 'hr'] + possible_hr_cols
|
||||||
hr_col: 'heart_rate' if hr_col else None,
|
speed_cols = ['speed', 'enhanced_speed']
|
||||||
speed_col: 'speed_ms' if speed_col else None
|
dist_cols = ['distance', 'total_distance']
|
||||||
})
|
|
||||||
|
|
||||||
# FIT lat/lon sind oft in semicircles - konvertiere zu Grad
|
# Finde die richtigen Spaltennamen
|
||||||
if df['lat'].max() > 180: # Semicircles detection
|
lat_col = next((col for col in lat_cols if col in df.columns), None)
|
||||||
df['lat'] = df['lat'] * (180 / 2**31)
|
lon_col = next((col for col in lon_cols if col in df.columns), None)
|
||||||
df['lon'] = df['lon'] * (180 / 2**31)
|
elev_col = next((col for col in elev_cols if col in df.columns), None)
|
||||||
|
time_col = next((col for col in time_cols if col in df.columns), None)
|
||||||
|
hr_col = next((col for col in hr_cols if col in df.columns), None)
|
||||||
|
speed_col = next((col for col in speed_cols if col in df.columns), None)
|
||||||
|
|
||||||
# Entferne Zeilen ohne GPS-Daten
|
# Prüfe ob wichtige Daten vorhanden sind
|
||||||
df = df.dropna(subset=['lat', 'lon', 'time']).reset_index(drop=True)
|
if not lat_col or not lon_col or not time_col:
|
||||||
|
raise ValueError(f"Wichtige Daten fehlen! Lat: {lat_col}, Lon: {lon_col}, Time: {time_col}")
|
||||||
|
|
||||||
# Basic cleanup
|
# Benenne Spalten einheitlich um
|
||||||
df['time'] = pd.to_datetime(df['time'])
|
df = df.rename(columns={
|
||||||
df['time_loc'] = df['time'].dt.tz_localize(None)
|
lat_col: 'lat',
|
||||||
df['time_diff'] = df['time'] - df['time'].iloc[0]
|
lon_col: 'lon',
|
||||||
df['time_diff_sec'] = df['time_diff'].dt.total_seconds()
|
elev_col: 'elev' if elev_col else None,
|
||||||
df['duration_hms'] = df['time_diff'].apply(lambda td: str(td).split('.')[0])
|
time_col: 'time',
|
||||||
|
hr_col: 'heart_rate' if hr_col else None,
|
||||||
|
speed_col: 'speed_ms' if speed_col else None
|
||||||
|
})
|
||||||
|
|
||||||
# Cumulative distance (km)
|
# FIT lat/lon sind oft in semicircles - konvertiere zu Grad
|
||||||
distances = [0]
|
if df['lat'].max() > 180: # Semicircles detection
|
||||||
for i in range(1, len(df)):
|
df['lat'] = df['lat'] * (180 / 2**31)
|
||||||
d = haversine(df.loc[i-1, 'lon'], df.loc[i-1, 'lat'], df.loc[i, 'lon'], df.loc[i, 'lat'])
|
df['lon'] = df['lon'] * (180 / 2**31)
|
||||||
distances.append(distances[-1] + d)
|
|
||||||
df['cum_dist_km'] = distances
|
|
||||||
|
|
||||||
# Elevation handling
|
# Entferne Zeilen ohne GPS-Daten
|
||||||
if 'elev' in df.columns:
|
df = df.dropna(subset=['lat', 'lon', 'time']).reset_index(drop=True)
|
||||||
df['elev'] = df['elev'].bfill()
|
|
||||||
df['delta_elev'] = df['elev'].diff().fillna(0)
|
# Basic cleanup
|
||||||
df['rel_elev'] = df['elev'] - df['elev'].iloc[0]
|
df['time'] = pd.to_datetime(df['time'])
|
||||||
|
df['time_loc'] = df['time'].dt.tz_localize(None)
|
||||||
|
df['time_diff'] = df['time'] - df['time'].iloc[0]
|
||||||
|
df['time_diff_sec'] = df['time_diff'].dt.total_seconds()
|
||||||
|
df['duration_hms'] = df['time_diff'].apply(lambda td: str(td).split('.')[0])
|
||||||
|
|
||||||
|
# Cumulative distance (km)
|
||||||
|
distances = [0]
|
||||||
|
for i in range(1, len(df)):
|
||||||
|
d = haversine(df.loc[i-1, 'lon'], df.loc[i-1, 'lat'], df.loc[i, 'lon'], df.loc[i, 'lat'])
|
||||||
|
distances.append(distances[-1] + d)
|
||||||
|
df['cum_dist_km'] = distances
|
||||||
|
|
||||||
|
# Elevation handling
|
||||||
|
if 'elev' in df.columns:
|
||||||
|
df['elev'] = df['elev'].bfill()
|
||||||
|
df['delta_elev'] = df['elev'].diff().fillna(0)
|
||||||
|
df['rel_elev'] = df['elev'] - df['elev'].iloc[0]
|
||||||
|
else:
|
||||||
|
# Fallback wenn keine Elevation vorhanden
|
||||||
|
df['elev'] = 0
|
||||||
|
df['delta_elev'] = 0
|
||||||
|
df['rel_elev'] = 0
|
||||||
|
|
||||||
|
# Speed calculation
|
||||||
|
if 'speed_ms' in df.columns:
|
||||||
|
# Konvertiere m/s zu km/h
|
||||||
|
df['speed_kmh'] = df['speed_ms'] * 3.6
|
||||||
|
else:
|
||||||
|
# Fallback: Berechne Speed aus GPS-Daten
|
||||||
|
df['delta_t'] = df['time'].diff().dt.total_seconds()
|
||||||
|
df['delta_d'] = df['cum_dist_km'].diff()
|
||||||
|
df['speed_kmh'] = (df['delta_d'] / df['delta_t']) * 3600
|
||||||
|
df['speed_kmh'] = df['speed_kmh'].replace([np.inf, -np.inf], np.nan)
|
||||||
|
|
||||||
|
# Velocity (used in pace calculations)
|
||||||
|
df['vel_kmps'] = np.gradient(df['cum_dist_km'], df['time_diff_sec'])
|
||||||
|
|
||||||
|
# Smoothed speed (Gaussian rolling)
|
||||||
|
df['speed_kmh_smooth'] = df['speed_kmh'].rolling(window=10, win_type="gaussian", center=True).mean(std=2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Heart rate handling (NEU!)
|
||||||
|
# ##############
|
||||||
|
# UPDATE: Da NaN-Problem mit heart_rate, manuell nochmal neu einlesen und überschreiben:
|
||||||
|
# save heart rate data into variable
|
||||||
|
heart_rate = []
|
||||||
|
for record in fit_file.get_messages("record"):
|
||||||
|
# Records can contain multiple pieces of data (ex: timestamp, latitude, longitude, etc)
|
||||||
|
for data in record:
|
||||||
|
# Print the name and value of the data (and the units if it has any)
|
||||||
|
if data.name == 'heart_rate':
|
||||||
|
heart_rate.append(data.value)
|
||||||
|
# Hier variable neu überschrieben:
|
||||||
|
df = safe_add_column_to_dataframe(df, 'heart_rate', heart_rate)
|
||||||
|
# ##############
|
||||||
|
|
||||||
|
# MY DEBUG:
|
||||||
|
#print(heart_rate)
|
||||||
|
if 'heart_rate' in df.columns:
|
||||||
|
df['heart_rate'] = pd.to_numeric(df['heart_rate'], errors='coerce')
|
||||||
|
df['hr_smooth'] = df['heart_rate'].rolling(window=5, center=True).mean()
|
||||||
|
print(f"Heart rate range: {df['heart_rate'].min():.0f} - {df['heart_rate'].max():.0f} bpm")
|
||||||
|
else:
|
||||||
|
print("Keine Heart Rate Daten gefunden!")
|
||||||
|
df['heart_rate'] = np.nan
|
||||||
|
df['hr_smooth'] = np.nan
|
||||||
|
|
||||||
|
print(f"Verarbeitete FIT-Datei: {len(df)} Datenpunkte")
|
||||||
|
print(f"Distanz: {df['cum_dist_km'].iloc[-1]:.2f} km")
|
||||||
|
print(f"Dauer: {df['duration_hms'].iloc[-1]}")
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Fehler beim Verarbeiten der FIT-Datei {file_path}: {str(e)}")
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def safe_add_column_to_dataframe(df, column_name, values):
|
||||||
|
"""
|
||||||
|
Fügt eine Spalte sicher zu einem DataFrame hinzu, auch wenn die Längen nicht übereinstimmen
|
||||||
|
"""
|
||||||
|
if df.empty:
|
||||||
|
return df
|
||||||
|
|
||||||
|
df_len = len(df)
|
||||||
|
values_len = len(values) if hasattr(values, '__len__') else 0
|
||||||
|
|
||||||
|
if values_len == df_len:
|
||||||
|
# Perfekt - gleiche Länge
|
||||||
|
df[column_name] = values
|
||||||
|
elif values_len > df_len:
|
||||||
|
# Zu viele Werte - kürze sie
|
||||||
|
print(f"WARNUNG: {column_name} hat {values_len} Werte, DataFrame hat {df_len} Zeilen. Kürze Werte.")
|
||||||
|
df[column_name] = values[:df_len]
|
||||||
|
elif values_len < df_len:
|
||||||
|
# Zu wenige Werte - fülle mit NaN auf
|
||||||
|
print(f"WARNUNG: {column_name} hat {values_len} Werte, DataFrame hat {df_len} Zeilen. Fülle mit NaN auf.")
|
||||||
|
extended_values = list(values) + [None] * (df_len - values_len)
|
||||||
|
df[column_name] = extended_values
|
||||||
else:
|
else:
|
||||||
# Fallback wenn keine Elevation vorhanden
|
# Keine Werte - fülle mit NaN
|
||||||
df['elev'] = 0
|
print(f"WARNUNG: Keine Werte für {column_name}. Fülle mit NaN.")
|
||||||
df['delta_elev'] = 0
|
df[column_name] = [None] * df_len
|
||||||
df['rel_elev'] = 0
|
|
||||||
|
|
||||||
# Speed calculation
|
|
||||||
if 'speed_ms' in df.columns:
|
|
||||||
# Konvertiere m/s zu km/h
|
|
||||||
df['speed_kmh'] = df['speed_ms'] * 3.6
|
|
||||||
else:
|
|
||||||
# Fallback: Berechne Speed aus GPS-Daten
|
|
||||||
df['delta_t'] = df['time'].diff().dt.total_seconds()
|
|
||||||
df['delta_d'] = df['cum_dist_km'].diff()
|
|
||||||
df['speed_kmh'] = (df['delta_d'] / df['delta_t']) * 3600
|
|
||||||
df['speed_kmh'] = df['speed_kmh'].replace([np.inf, -np.inf], np.nan)
|
|
||||||
|
|
||||||
# Velocity (used in pace calculations)
|
|
||||||
df['vel_kmps'] = np.gradient(df['cum_dist_km'], df['time_diff_sec'])
|
|
||||||
|
|
||||||
# Smoothed speed (Gaussian rolling)
|
|
||||||
df['speed_kmh_smooth'] = df['speed_kmh'].rolling(window=10, win_type="gaussian", center=True).mean(std=2)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Heart rate handling (NEU!)
|
|
||||||
# ##############
|
|
||||||
# UPDATE: Da NaN-Problem mit heart_rate, manuell nochmal neu einlesen und überschreiben:
|
|
||||||
# save heart rate data into variable
|
|
||||||
heart_rate = []
|
|
||||||
for record in fit_file.get_messages("record"):
|
|
||||||
# Records can contain multiple pieces of data (ex: timestamp, latitude, longitude, etc)
|
|
||||||
for data in record:
|
|
||||||
# Print the name and value of the data (and the units if it has any)
|
|
||||||
if data.name == 'heart_rate':
|
|
||||||
heart_rate.append(data.value)
|
|
||||||
# hier variable neu überschrieben:
|
|
||||||
df['heart_rate'] = heart_rate[:len(df)]
|
|
||||||
# ##############
|
|
||||||
|
|
||||||
# MY DEBUG:
|
|
||||||
#print(heart_rate)
|
|
||||||
if 'heart_rate' in df.columns:
|
|
||||||
df['heart_rate'] = pd.to_numeric(df['heart_rate'], errors='coerce')
|
|
||||||
df['hr_smooth'] = df['heart_rate'].rolling(window=5, center=True).mean()
|
|
||||||
print(f"Heart rate range: {df['heart_rate'].min():.0f} - {df['heart_rate'].max():.0f} bpm")
|
|
||||||
else:
|
|
||||||
print("Keine Heart Rate Daten gefunden!")
|
|
||||||
df['heart_rate'] = np.nan
|
|
||||||
df['hr_smooth'] = np.nan
|
|
||||||
|
|
||||||
print(f"Verarbeitete FIT-Datei: {len(df)} Datenpunkte")
|
|
||||||
print(f"Distanz: {df['cum_dist_km'].iloc[-1]:.2f} km")
|
|
||||||
print(f"Dauer: {df['duration_hms'].iloc[-1]}")
|
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# INFO BANNER
|
# INFO BANNER
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user