Heart Rate data length of index mismatch fix - FIT app Version
This commit is contained in:
367
fit_app.py
367
fit_app.py
@@ -27,179 +27,276 @@ from fitparse import FitFile
|
||||
|
||||
# === Helper Functions ===
|
||||
def list_fit_files():
|
||||
"""
|
||||
Listet alle .fit Files im Verzeichnis auf und sortiert sie nach Datum
|
||||
"""
|
||||
folder = './fit_files'
|
||||
files = [f for f in os.listdir(folder) if f.lower().endswith('.fit')]
|
||||
|
||||
# Extract date from the start of the filename and sort descending
|
||||
# Prüfe ob Ordner existiert
|
||||
if not os.path.exists(folder):
|
||||
print(f"Ordner {folder} existiert nicht!")
|
||||
return [{'label': 'Ordner nicht gefunden', 'value': 'NO_FOLDER'}]
|
||||
|
||||
# Hole alle .fit Files
|
||||
try:
|
||||
all_files = os.listdir(folder)
|
||||
files = [f for f in all_files if f.lower().endswith('.fit')]
|
||||
except Exception as e:
|
||||
print(f"Fehler beim Lesen des Ordners: {e}")
|
||||
return [{'label': 'Fehler beim Lesen', 'value': 'ERROR'}]
|
||||
|
||||
def extract_date(filename):
|
||||
"""Extrahiert Datum aus Filename für Sortierung"""
|
||||
try:
|
||||
return datetime.datetime.strptime(filename[:10], '%d.%m.%Y') # Format DD.MM.YYYY
|
||||
# Versuche verschiedene Datumsformate
|
||||
return datetime.datetime.strptime(filename[:10], '%d.%m.%Y')
|
||||
except ValueError:
|
||||
try:
|
||||
return datetime.datetime.strptime(filename[:10], '%Y-%m-%d') # Format YYYY-MM-DD
|
||||
return datetime.datetime.strptime(filename[:10], '%Y-%m-%d')
|
||||
except ValueError:
|
||||
return datetime.datetime.min # Ungültige -> ans Ende
|
||||
try:
|
||||
# Versuche auch andere Formate
|
||||
return datetime.datetime.strptime(filename[:8], '%Y%m%d')
|
||||
except ValueError:
|
||||
# Wenn kein Datum erkennbar, nutze Datei-Änderungsdatum
|
||||
try:
|
||||
file_path = os.path.join(folder, filename)
|
||||
return datetime.datetime.fromtimestamp(os.path.getmtime(file_path))
|
||||
except:
|
||||
return datetime.datetime.min
|
||||
|
||||
# Sortiere Files nach Datum (neueste zuerst)
|
||||
files.sort(key=extract_date, reverse=True)
|
||||
|
||||
# Dropdown-Einträge bauen
|
||||
# Erstelle Dropdown-Optionen
|
||||
if files:
|
||||
return [{'label': f, 'value': os.path.join(folder, f)} for f in files]
|
||||
options = []
|
||||
for f in files:
|
||||
file_path = os.path.join(folder, f)
|
||||
# Zeige auch Dateigröße und Änderungsdatum an
|
||||
try:
|
||||
size_mb = os.path.getsize(file_path) / (1024 * 1024)
|
||||
mod_time = datetime.datetime.fromtimestamp(os.path.getmtime(file_path))
|
||||
label = f"{f}"
|
||||
#label = f"{f} ({size_mb:.1f}MB - {mod_time.strftime('%d.%m.%Y %H:%M')}\n)" # For debugging purpose
|
||||
except:
|
||||
label = f
|
||||
|
||||
options.append({
|
||||
'label': label,
|
||||
'value': file_path
|
||||
})
|
||||
return options
|
||||
else:
|
||||
# Dummy-Eintrag, damit es nie crasht
|
||||
return [{
|
||||
'label': 'Keine FIT-Datei gefunden',
|
||||
'value': 'NO_FILE'
|
||||
}]
|
||||
return [{'label': 'Keine .fit Dateien gefunden', 'value': 'NO_FILE'}]
|
||||
|
||||
def haversine(lon1, lat1, lon2, lat2):
|
||||
R = 6371
|
||||
"""
|
||||
Berechnet die Entfernung zwischen zwei GPS-Koordinaten in km
|
||||
"""
|
||||
R = 6371 # Erdradius in km
|
||||
dlon = radians(lon2 - lon1)
|
||||
dlat = radians(lat2 - lat1)
|
||||
a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
|
||||
return 2 * R * asin(sqrt(a))
|
||||
|
||||
def process_fit(file_path):
|
||||
fit_file = FitFile(file_path)
|
||||
"""
|
||||
Verarbeitet eine FIT-Datei und erstellt einen DataFrame
|
||||
"""
|
||||
if file_path in ['NO_FILE', 'NO_FOLDER', 'ERROR']:
|
||||
print(f"Ungültiger Dateipfad: {file_path}")
|
||||
return pd.DataFrame()
|
||||
|
||||
# Sammle alle record-Daten
|
||||
records = []
|
||||
for record in fit_file.get_messages("record"):
|
||||
record_data = {}
|
||||
for data in record:
|
||||
# Sammle alle verfügbaren Datenfelder
|
||||
record_data[data.name] = data.value
|
||||
records.append(record_data)
|
||||
if not os.path.exists(file_path):
|
||||
print(f"Datei nicht gefunden: {file_path}")
|
||||
return pd.DataFrame()
|
||||
|
||||
# Erstelle DataFrame
|
||||
df = pd.DataFrame(records)
|
||||
try:
|
||||
fit_file = FitFile(file_path)
|
||||
print(f"Verarbeite FIT-Datei: {file_path}")
|
||||
|
||||
# Debugging: Schaue welche Spalten verfügbar sind
|
||||
print(f"Verfügbare Spalten: {df.columns.tolist()}")
|
||||
# Sammle alle record-Daten
|
||||
records = []
|
||||
for record in fit_file.get_messages("record"):
|
||||
record_data = {}
|
||||
for data in record:
|
||||
# Sammle alle verfügbaren Datenfelder
|
||||
record_data[data.name] = data.value
|
||||
records.append(record_data)
|
||||
|
||||
# Suche nach Heart Rate in verschiedenen Formaten
|
||||
possible_hr_cols = [col for col in df.columns if 'heart' in col.lower() or 'hr' in col.lower()]
|
||||
print(f"Mögliche Heart Rate Spalten: {possible_hr_cols}")
|
||||
if not records:
|
||||
print("Keine Aufzeichnungsdaten in der FIT-Datei gefunden")
|
||||
return pd.DataFrame()
|
||||
|
||||
# Standard-Spaltennamen für verschiedene FIT-Formate
|
||||
lat_cols = ['position_lat', 'lat', 'latitude']
|
||||
lon_cols = ['position_long', 'lon', 'longitude']
|
||||
elev_cols = ['altitude', 'elev', 'elevation', 'enhanced_altitude']
|
||||
time_cols = ['timestamp', 'time']
|
||||
hr_cols = ['heart_rate', 'hr'] + possible_hr_cols
|
||||
speed_cols = ['speed', 'enhanced_speed']
|
||||
dist_cols = ['distance', 'total_distance']
|
||||
# Erstelle DataFrame
|
||||
df = pd.DataFrame(records)
|
||||
print(f"DataFrame erstellt mit {len(df)} Zeilen und Spalten: {list(df.columns)}")
|
||||
|
||||
# Finde die richtigen Spaltennamen
|
||||
lat_col = next((col for col in lat_cols if col in df.columns), None)
|
||||
lon_col = next((col for col in lon_cols if col in df.columns), None)
|
||||
elev_col = next((col for col in elev_cols if col in df.columns), None)
|
||||
time_col = next((col for col in time_cols if col in df.columns), None)
|
||||
hr_col = next((col for col in hr_cols if col in df.columns), None)
|
||||
speed_col = next((col for col in speed_cols if col in df.columns), None)
|
||||
# Debugging: Schaue welche Spalten verfügbar sind
|
||||
print(f"Verfügbare Spalten: {df.columns.tolist()}")
|
||||
|
||||
# Prüfe ob wichtige Daten vorhanden sind
|
||||
if not lat_col or not lon_col or not time_col:
|
||||
raise ValueError(f"Wichtige Daten fehlen! Lat: {lat_col}, Lon: {lon_col}, Time: {time_col}")
|
||||
# Suche nach Heart Rate in verschiedenen Formaten
|
||||
possible_hr_cols = [col for col in df.columns if 'heart' in col.lower() or 'hr' in col.lower()]
|
||||
print(f"Mögliche Heart Rate Spalten: {possible_hr_cols}")
|
||||
|
||||
# Benenne Spalten einheitlich um
|
||||
df = df.rename(columns={
|
||||
lat_col: 'lat',
|
||||
lon_col: 'lon',
|
||||
elev_col: 'elev' if elev_col else None,
|
||||
time_col: 'time',
|
||||
hr_col: 'heart_rate' if hr_col else None,
|
||||
speed_col: 'speed_ms' if speed_col else None
|
||||
})
|
||||
# Standard-Spaltennamen für verschiedene FIT-Formate
|
||||
lat_cols = ['position_lat', 'lat', 'latitude']
|
||||
lon_cols = ['position_long', 'lon', 'longitude']
|
||||
elev_cols = ['altitude', 'elev', 'elevation', 'enhanced_altitude']
|
||||
time_cols = ['timestamp', 'time']
|
||||
hr_cols = ['heart_rate', 'hr'] + possible_hr_cols
|
||||
speed_cols = ['speed', 'enhanced_speed']
|
||||
dist_cols = ['distance', 'total_distance']
|
||||
|
||||
# FIT lat/lon sind oft in semicircles - konvertiere zu Grad
|
||||
if df['lat'].max() > 180: # Semicircles detection
|
||||
df['lat'] = df['lat'] * (180 / 2**31)
|
||||
df['lon'] = df['lon'] * (180 / 2**31)
|
||||
# Finde die richtigen Spaltennamen
|
||||
lat_col = next((col for col in lat_cols if col in df.columns), None)
|
||||
lon_col = next((col for col in lon_cols if col in df.columns), None)
|
||||
elev_col = next((col for col in elev_cols if col in df.columns), None)
|
||||
time_col = next((col for col in time_cols if col in df.columns), None)
|
||||
hr_col = next((col for col in hr_cols if col in df.columns), None)
|
||||
speed_col = next((col for col in speed_cols if col in df.columns), None)
|
||||
|
||||
# Entferne Zeilen ohne GPS-Daten
|
||||
df = df.dropna(subset=['lat', 'lon', 'time']).reset_index(drop=True)
|
||||
# Prüfe ob wichtige Daten vorhanden sind
|
||||
if not lat_col or not lon_col or not time_col:
|
||||
raise ValueError(f"Wichtige Daten fehlen! Lat: {lat_col}, Lon: {lon_col}, Time: {time_col}")
|
||||
|
||||
# Basic cleanup
|
||||
df['time'] = pd.to_datetime(df['time'])
|
||||
df['time_loc'] = df['time'].dt.tz_localize(None)
|
||||
df['time_diff'] = df['time'] - df['time'].iloc[0]
|
||||
df['time_diff_sec'] = df['time_diff'].dt.total_seconds()
|
||||
df['duration_hms'] = df['time_diff'].apply(lambda td: str(td).split('.')[0])
|
||||
# Benenne Spalten einheitlich um
|
||||
df = df.rename(columns={
|
||||
lat_col: 'lat',
|
||||
lon_col: 'lon',
|
||||
elev_col: 'elev' if elev_col else None,
|
||||
time_col: 'time',
|
||||
hr_col: 'heart_rate' if hr_col else None,
|
||||
speed_col: 'speed_ms' if speed_col else None
|
||||
})
|
||||
|
||||
# Cumulative distance (km)
|
||||
distances = [0]
|
||||
for i in range(1, len(df)):
|
||||
d = haversine(df.loc[i-1, 'lon'], df.loc[i-1, 'lat'], df.loc[i, 'lon'], df.loc[i, 'lat'])
|
||||
distances.append(distances[-1] + d)
|
||||
df['cum_dist_km'] = distances
|
||||
# FIT lat/lon sind oft in semicircles - konvertiere zu Grad
|
||||
if df['lat'].max() > 180: # Semicircles detection
|
||||
df['lat'] = df['lat'] * (180 / 2**31)
|
||||
df['lon'] = df['lon'] * (180 / 2**31)
|
||||
|
||||
# Elevation handling
|
||||
if 'elev' in df.columns:
|
||||
df['elev'] = df['elev'].bfill()
|
||||
df['delta_elev'] = df['elev'].diff().fillna(0)
|
||||
df['rel_elev'] = df['elev'] - df['elev'].iloc[0]
|
||||
# Entferne Zeilen ohne GPS-Daten
|
||||
df = df.dropna(subset=['lat', 'lon', 'time']).reset_index(drop=True)
|
||||
|
||||
# Basic cleanup
|
||||
df['time'] = pd.to_datetime(df['time'])
|
||||
df['time_loc'] = df['time'].dt.tz_localize(None)
|
||||
df['time_diff'] = df['time'] - df['time'].iloc[0]
|
||||
df['time_diff_sec'] = df['time_diff'].dt.total_seconds()
|
||||
df['duration_hms'] = df['time_diff'].apply(lambda td: str(td).split('.')[0])
|
||||
|
||||
# Cumulative distance (km)
|
||||
distances = [0]
|
||||
for i in range(1, len(df)):
|
||||
d = haversine(df.loc[i-1, 'lon'], df.loc[i-1, 'lat'], df.loc[i, 'lon'], df.loc[i, 'lat'])
|
||||
distances.append(distances[-1] + d)
|
||||
df['cum_dist_km'] = distances
|
||||
|
||||
# Elevation handling
|
||||
if 'elev' in df.columns:
|
||||
df['elev'] = df['elev'].bfill()
|
||||
df['delta_elev'] = df['elev'].diff().fillna(0)
|
||||
df['rel_elev'] = df['elev'] - df['elev'].iloc[0]
|
||||
else:
|
||||
# Fallback wenn keine Elevation vorhanden
|
||||
df['elev'] = 0
|
||||
df['delta_elev'] = 0
|
||||
df['rel_elev'] = 0
|
||||
|
||||
# Speed calculation
|
||||
if 'speed_ms' in df.columns:
|
||||
# Konvertiere m/s zu km/h
|
||||
df['speed_kmh'] = df['speed_ms'] * 3.6
|
||||
else:
|
||||
# Fallback: Berechne Speed aus GPS-Daten
|
||||
df['delta_t'] = df['time'].diff().dt.total_seconds()
|
||||
df['delta_d'] = df['cum_dist_km'].diff()
|
||||
df['speed_kmh'] = (df['delta_d'] / df['delta_t']) * 3600
|
||||
df['speed_kmh'] = df['speed_kmh'].replace([np.inf, -np.inf], np.nan)
|
||||
|
||||
# Velocity (used in pace calculations)
|
||||
df['vel_kmps'] = np.gradient(df['cum_dist_km'], df['time_diff_sec'])
|
||||
|
||||
# Smoothed speed (Gaussian rolling)
|
||||
df['speed_kmh_smooth'] = df['speed_kmh'].rolling(window=10, win_type="gaussian", center=True).mean(std=2)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Heart rate handling (NEU!)
|
||||
# ##############
|
||||
# UPDATE: Da NaN-Problem mit heart_rate, manuell nochmal neu einlesen und überschreiben:
|
||||
# save heart rate data into variable
|
||||
heart_rate = []
|
||||
for record in fit_file.get_messages("record"):
|
||||
# Records can contain multiple pieces of data (ex: timestamp, latitude, longitude, etc)
|
||||
for data in record:
|
||||
# Print the name and value of the data (and the units if it has any)
|
||||
if data.name == 'heart_rate':
|
||||
heart_rate.append(data.value)
|
||||
# Hier variable neu überschrieben:
|
||||
df = safe_add_column_to_dataframe(df, 'heart_rate', heart_rate)
|
||||
# ##############
|
||||
|
||||
# MY DEBUG:
|
||||
#print(heart_rate)
|
||||
if 'heart_rate' in df.columns:
|
||||
df['heart_rate'] = pd.to_numeric(df['heart_rate'], errors='coerce')
|
||||
df['hr_smooth'] = df['heart_rate'].rolling(window=5, center=True).mean()
|
||||
print(f"Heart rate range: {df['heart_rate'].min():.0f} - {df['heart_rate'].max():.0f} bpm")
|
||||
else:
|
||||
print("Keine Heart Rate Daten gefunden!")
|
||||
df['heart_rate'] = np.nan
|
||||
df['hr_smooth'] = np.nan
|
||||
|
||||
print(f"Verarbeitete FIT-Datei: {len(df)} Datenpunkte")
|
||||
print(f"Distanz: {df['cum_dist_km'].iloc[-1]:.2f} km")
|
||||
print(f"Dauer: {df['duration_hms'].iloc[-1]}")
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
print(f"Fehler beim Verarbeiten der FIT-Datei {file_path}: {str(e)}")
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
|
||||
|
||||
def safe_add_column_to_dataframe(df, column_name, values):
|
||||
"""
|
||||
Fügt eine Spalte sicher zu einem DataFrame hinzu, auch wenn die Längen nicht übereinstimmen
|
||||
"""
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
df_len = len(df)
|
||||
values_len = len(values) if hasattr(values, '__len__') else 0
|
||||
|
||||
if values_len == df_len:
|
||||
# Perfekt - gleiche Länge
|
||||
df[column_name] = values
|
||||
elif values_len > df_len:
|
||||
# Zu viele Werte - kürze sie
|
||||
print(f"WARNUNG: {column_name} hat {values_len} Werte, DataFrame hat {df_len} Zeilen. Kürze Werte.")
|
||||
df[column_name] = values[:df_len]
|
||||
elif values_len < df_len:
|
||||
# Zu wenige Werte - fülle mit NaN auf
|
||||
print(f"WARNUNG: {column_name} hat {values_len} Werte, DataFrame hat {df_len} Zeilen. Fülle mit NaN auf.")
|
||||
extended_values = list(values) + [None] * (df_len - values_len)
|
||||
df[column_name] = extended_values
|
||||
else:
|
||||
# Fallback wenn keine Elevation vorhanden
|
||||
df['elev'] = 0
|
||||
df['delta_elev'] = 0
|
||||
df['rel_elev'] = 0
|
||||
|
||||
# Speed calculation
|
||||
if 'speed_ms' in df.columns:
|
||||
# Konvertiere m/s zu km/h
|
||||
df['speed_kmh'] = df['speed_ms'] * 3.6
|
||||
else:
|
||||
# Fallback: Berechne Speed aus GPS-Daten
|
||||
df['delta_t'] = df['time'].diff().dt.total_seconds()
|
||||
df['delta_d'] = df['cum_dist_km'].diff()
|
||||
df['speed_kmh'] = (df['delta_d'] / df['delta_t']) * 3600
|
||||
df['speed_kmh'] = df['speed_kmh'].replace([np.inf, -np.inf], np.nan)
|
||||
|
||||
# Velocity (used in pace calculations)
|
||||
df['vel_kmps'] = np.gradient(df['cum_dist_km'], df['time_diff_sec'])
|
||||
|
||||
# Smoothed speed (Gaussian rolling)
|
||||
df['speed_kmh_smooth'] = df['speed_kmh'].rolling(window=10, win_type="gaussian", center=True).mean(std=2)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Heart rate handling (NEU!)
|
||||
# ##############
|
||||
# UPDATE: Da NaN-Problem mit heart_rate, manuell nochmal neu einlesen und überschreiben:
|
||||
# save heart rate data into variable
|
||||
heart_rate = []
|
||||
for record in fit_file.get_messages("record"):
|
||||
# Records can contain multiple pieces of data (ex: timestamp, latitude, longitude, etc)
|
||||
for data in record:
|
||||
# Print the name and value of the data (and the units if it has any)
|
||||
if data.name == 'heart_rate':
|
||||
heart_rate.append(data.value)
|
||||
# hier variable neu überschrieben:
|
||||
df['heart_rate'] = heart_rate[:len(df)]
|
||||
# ##############
|
||||
|
||||
# MY DEBUG:
|
||||
#print(heart_rate)
|
||||
if 'heart_rate' in df.columns:
|
||||
df['heart_rate'] = pd.to_numeric(df['heart_rate'], errors='coerce')
|
||||
df['hr_smooth'] = df['heart_rate'].rolling(window=5, center=True).mean()
|
||||
print(f"Heart rate range: {df['heart_rate'].min():.0f} - {df['heart_rate'].max():.0f} bpm")
|
||||
else:
|
||||
print("Keine Heart Rate Daten gefunden!")
|
||||
df['heart_rate'] = np.nan
|
||||
df['hr_smooth'] = np.nan
|
||||
|
||||
print(f"Verarbeitete FIT-Datei: {len(df)} Datenpunkte")
|
||||
print(f"Distanz: {df['cum_dist_km'].iloc[-1]:.2f} km")
|
||||
print(f"Dauer: {df['duration_hms'].iloc[-1]}")
|
||||
# Keine Werte - fülle mit NaN
|
||||
print(f"WARNUNG: Keine Werte für {column_name}. Fülle mit NaN.")
|
||||
df[column_name] = [None] * df_len
|
||||
|
||||
return df
|
||||
|
||||
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# INFO BANNER
|
||||
# =============================================================================
|
||||
|
||||
Reference in New Issue
Block a user