diff --git a/fit_app.py b/fit_app.py index 23c8c9e..ea286af 100644 --- a/fit_app.py +++ b/fit_app.py @@ -27,179 +27,276 @@ from fitparse import FitFile # === Helper Functions === def list_fit_files(): + """ + Listet alle .fit Files im Verzeichnis auf und sortiert sie nach Datum + """ folder = './fit_files' - files = [f for f in os.listdir(folder) if f.lower().endswith('.fit')] - # Extract date from the start of the filename and sort descending + # Prüfe ob Ordner existiert + if not os.path.exists(folder): + print(f"Ordner {folder} existiert nicht!") + return [{'label': 'Ordner nicht gefunden', 'value': 'NO_FOLDER'}] + + # Hole alle .fit Files + try: + all_files = os.listdir(folder) + files = [f for f in all_files if f.lower().endswith('.fit')] + except Exception as e: + print(f"Fehler beim Lesen des Ordners: {e}") + return [{'label': 'Fehler beim Lesen', 'value': 'ERROR'}] + def extract_date(filename): + """Extrahiert Datum aus Filename für Sortierung""" try: - return datetime.datetime.strptime(filename[:10], '%d.%m.%Y') # Format DD.MM.YYYY + # Versuche verschiedene Datumsformate + return datetime.datetime.strptime(filename[:10], '%d.%m.%Y') except ValueError: try: - return datetime.datetime.strptime(filename[:10], '%Y-%m-%d') # Format YYYY-MM-DD + return datetime.datetime.strptime(filename[:10], '%Y-%m-%d') except ValueError: - return datetime.datetime.min # Ungültige -> ans Ende + try: + # Versuche auch andere Formate + return datetime.datetime.strptime(filename[:8], '%Y%m%d') + except ValueError: + # Wenn kein Datum erkennbar, nutze Datei-Änderungsdatum + try: + file_path = os.path.join(folder, filename) + return datetime.datetime.fromtimestamp(os.path.getmtime(file_path)) + except: + return datetime.datetime.min + # Sortiere Files nach Datum (neueste zuerst) files.sort(key=extract_date, reverse=True) - # Dropdown-Einträge bauen + # Erstelle Dropdown-Optionen if files: - return [{'label': f, 'value': os.path.join(folder, f)} for f in files] + options = [] + for f in files: + file_path = os.path.join(folder, f) + # Zeige auch Dateigröße und Änderungsdatum an + try: + size_mb = os.path.getsize(file_path) / (1024 * 1024) + mod_time = datetime.datetime.fromtimestamp(os.path.getmtime(file_path)) + label = f"{f}" + #label = f"{f} ({size_mb:.1f}MB - {mod_time.strftime('%d.%m.%Y %H:%M')}\n)" # For debugging purpose + except: + label = f + + options.append({ + 'label': label, + 'value': file_path + }) + return options else: - # Dummy-Eintrag, damit es nie crasht - return [{ - 'label': 'Keine FIT-Datei gefunden', - 'value': 'NO_FILE' - }] + return [{'label': 'Keine .fit Dateien gefunden', 'value': 'NO_FILE'}] def haversine(lon1, lat1, lon2, lat2): - R = 6371 + """ + Berechnet die Entfernung zwischen zwei GPS-Koordinaten in km + """ + R = 6371 # Erdradius in km dlon = radians(lon2 - lon1) dlat = radians(lat2 - lat1) a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2 return 2 * R * asin(sqrt(a)) def process_fit(file_path): - fit_file = FitFile(file_path) + """ + Verarbeitet eine FIT-Datei und erstellt einen DataFrame + """ + if file_path in ['NO_FILE', 'NO_FOLDER', 'ERROR']: + print(f"Ungültiger Dateipfad: {file_path}") + return pd.DataFrame() - # Sammle alle record-Daten - records = [] - for record in fit_file.get_messages("record"): - record_data = {} - for data in record: - # Sammle alle verfügbaren Datenfelder - record_data[data.name] = data.value - records.append(record_data) + if not os.path.exists(file_path): + print(f"Datei nicht gefunden: {file_path}") + return pd.DataFrame() - # Erstelle DataFrame - df = pd.DataFrame(records) + try: + fit_file = FitFile(file_path) + print(f"Verarbeite FIT-Datei: {file_path}") - # Debugging: Schaue welche Spalten verfügbar sind - print(f"Verfügbare Spalten: {df.columns.tolist()}") + # Sammle alle record-Daten + records = [] + for record in fit_file.get_messages("record"): + record_data = {} + for data in record: + # Sammle alle verfügbaren Datenfelder + record_data[data.name] = data.value + records.append(record_data) - # Suche nach Heart Rate in verschiedenen Formaten - possible_hr_cols = [col for col in df.columns if 'heart' in col.lower() or 'hr' in col.lower()] - print(f"Mögliche Heart Rate Spalten: {possible_hr_cols}") + if not records: + print("Keine Aufzeichnungsdaten in der FIT-Datei gefunden") + return pd.DataFrame() - # Standard-Spaltennamen für verschiedene FIT-Formate - lat_cols = ['position_lat', 'lat', 'latitude'] - lon_cols = ['position_long', 'lon', 'longitude'] - elev_cols = ['altitude', 'elev', 'elevation', 'enhanced_altitude'] - time_cols = ['timestamp', 'time'] - hr_cols = ['heart_rate', 'hr'] + possible_hr_cols - speed_cols = ['speed', 'enhanced_speed'] - dist_cols = ['distance', 'total_distance'] + # Erstelle DataFrame + df = pd.DataFrame(records) + print(f"DataFrame erstellt mit {len(df)} Zeilen und Spalten: {list(df.columns)}") - # Finde die richtigen Spaltennamen - lat_col = next((col for col in lat_cols if col in df.columns), None) - lon_col = next((col for col in lon_cols if col in df.columns), None) - elev_col = next((col for col in elev_cols if col in df.columns), None) - time_col = next((col for col in time_cols if col in df.columns), None) - hr_col = next((col for col in hr_cols if col in df.columns), None) - speed_col = next((col for col in speed_cols if col in df.columns), None) + # Debugging: Schaue welche Spalten verfügbar sind + print(f"Verfügbare Spalten: {df.columns.tolist()}") - # Prüfe ob wichtige Daten vorhanden sind - if not lat_col or not lon_col or not time_col: - raise ValueError(f"Wichtige Daten fehlen! Lat: {lat_col}, Lon: {lon_col}, Time: {time_col}") + # Suche nach Heart Rate in verschiedenen Formaten + possible_hr_cols = [col for col in df.columns if 'heart' in col.lower() or 'hr' in col.lower()] + print(f"Mögliche Heart Rate Spalten: {possible_hr_cols}") - # Benenne Spalten einheitlich um - df = df.rename(columns={ - lat_col: 'lat', - lon_col: 'lon', - elev_col: 'elev' if elev_col else None, - time_col: 'time', - hr_col: 'heart_rate' if hr_col else None, - speed_col: 'speed_ms' if speed_col else None - }) + # Standard-Spaltennamen für verschiedene FIT-Formate + lat_cols = ['position_lat', 'lat', 'latitude'] + lon_cols = ['position_long', 'lon', 'longitude'] + elev_cols = ['altitude', 'elev', 'elevation', 'enhanced_altitude'] + time_cols = ['timestamp', 'time'] + hr_cols = ['heart_rate', 'hr'] + possible_hr_cols + speed_cols = ['speed', 'enhanced_speed'] + dist_cols = ['distance', 'total_distance'] - # FIT lat/lon sind oft in semicircles - konvertiere zu Grad - if df['lat'].max() > 180: # Semicircles detection - df['lat'] = df['lat'] * (180 / 2**31) - df['lon'] = df['lon'] * (180 / 2**31) + # Finde die richtigen Spaltennamen + lat_col = next((col for col in lat_cols if col in df.columns), None) + lon_col = next((col for col in lon_cols if col in df.columns), None) + elev_col = next((col for col in elev_cols if col in df.columns), None) + time_col = next((col for col in time_cols if col in df.columns), None) + hr_col = next((col for col in hr_cols if col in df.columns), None) + speed_col = next((col for col in speed_cols if col in df.columns), None) - # Entferne Zeilen ohne GPS-Daten - df = df.dropna(subset=['lat', 'lon', 'time']).reset_index(drop=True) + # Prüfe ob wichtige Daten vorhanden sind + if not lat_col or not lon_col or not time_col: + raise ValueError(f"Wichtige Daten fehlen! Lat: {lat_col}, Lon: {lon_col}, Time: {time_col}") - # Basic cleanup - df['time'] = pd.to_datetime(df['time']) - df['time_loc'] = df['time'].dt.tz_localize(None) - df['time_diff'] = df['time'] - df['time'].iloc[0] - df['time_diff_sec'] = df['time_diff'].dt.total_seconds() - df['duration_hms'] = df['time_diff'].apply(lambda td: str(td).split('.')[0]) + # Benenne Spalten einheitlich um + df = df.rename(columns={ + lat_col: 'lat', + lon_col: 'lon', + elev_col: 'elev' if elev_col else None, + time_col: 'time', + hr_col: 'heart_rate' if hr_col else None, + speed_col: 'speed_ms' if speed_col else None + }) - # Cumulative distance (km) - distances = [0] - for i in range(1, len(df)): - d = haversine(df.loc[i-1, 'lon'], df.loc[i-1, 'lat'], df.loc[i, 'lon'], df.loc[i, 'lat']) - distances.append(distances[-1] + d) - df['cum_dist_km'] = distances + # FIT lat/lon sind oft in semicircles - konvertiere zu Grad + if df['lat'].max() > 180: # Semicircles detection + df['lat'] = df['lat'] * (180 / 2**31) + df['lon'] = df['lon'] * (180 / 2**31) - # Elevation handling - if 'elev' in df.columns: - df['elev'] = df['elev'].bfill() - df['delta_elev'] = df['elev'].diff().fillna(0) - df['rel_elev'] = df['elev'] - df['elev'].iloc[0] + # Entferne Zeilen ohne GPS-Daten + df = df.dropna(subset=['lat', 'lon', 'time']).reset_index(drop=True) + + # Basic cleanup + df['time'] = pd.to_datetime(df['time']) + df['time_loc'] = df['time'].dt.tz_localize(None) + df['time_diff'] = df['time'] - df['time'].iloc[0] + df['time_diff_sec'] = df['time_diff'].dt.total_seconds() + df['duration_hms'] = df['time_diff'].apply(lambda td: str(td).split('.')[0]) + + # Cumulative distance (km) + distances = [0] + for i in range(1, len(df)): + d = haversine(df.loc[i-1, 'lon'], df.loc[i-1, 'lat'], df.loc[i, 'lon'], df.loc[i, 'lat']) + distances.append(distances[-1] + d) + df['cum_dist_km'] = distances + + # Elevation handling + if 'elev' in df.columns: + df['elev'] = df['elev'].bfill() + df['delta_elev'] = df['elev'].diff().fillna(0) + df['rel_elev'] = df['elev'] - df['elev'].iloc[0] + else: + # Fallback wenn keine Elevation vorhanden + df['elev'] = 0 + df['delta_elev'] = 0 + df['rel_elev'] = 0 + + # Speed calculation + if 'speed_ms' in df.columns: + # Konvertiere m/s zu km/h + df['speed_kmh'] = df['speed_ms'] * 3.6 + else: + # Fallback: Berechne Speed aus GPS-Daten + df['delta_t'] = df['time'].diff().dt.total_seconds() + df['delta_d'] = df['cum_dist_km'].diff() + df['speed_kmh'] = (df['delta_d'] / df['delta_t']) * 3600 + df['speed_kmh'] = df['speed_kmh'].replace([np.inf, -np.inf], np.nan) + + # Velocity (used in pace calculations) + df['vel_kmps'] = np.gradient(df['cum_dist_km'], df['time_diff_sec']) + + # Smoothed speed (Gaussian rolling) + df['speed_kmh_smooth'] = df['speed_kmh'].rolling(window=10, win_type="gaussian", center=True).mean(std=2) + + + + + + + # Heart rate handling (NEU!) + # ############## + # UPDATE: Da NaN-Problem mit heart_rate, manuell nochmal neu einlesen und überschreiben: + # save heart rate data into variable + heart_rate = [] + for record in fit_file.get_messages("record"): + # Records can contain multiple pieces of data (ex: timestamp, latitude, longitude, etc) + for data in record: + # Print the name and value of the data (and the units if it has any) + if data.name == 'heart_rate': + heart_rate.append(data.value) + # Hier variable neu überschrieben: + df = safe_add_column_to_dataframe(df, 'heart_rate', heart_rate) + # ############## + + # MY DEBUG: + #print(heart_rate) + if 'heart_rate' in df.columns: + df['heart_rate'] = pd.to_numeric(df['heart_rate'], errors='coerce') + df['hr_smooth'] = df['heart_rate'].rolling(window=5, center=True).mean() + print(f"Heart rate range: {df['heart_rate'].min():.0f} - {df['heart_rate'].max():.0f} bpm") + else: + print("Keine Heart Rate Daten gefunden!") + df['heart_rate'] = np.nan + df['hr_smooth'] = np.nan + + print(f"Verarbeitete FIT-Datei: {len(df)} Datenpunkte") + print(f"Distanz: {df['cum_dist_km'].iloc[-1]:.2f} km") + print(f"Dauer: {df['duration_hms'].iloc[-1]}") + + return df + + except Exception as e: + print(f"Fehler beim Verarbeiten der FIT-Datei {file_path}: {str(e)}") + return pd.DataFrame() + + + + +def safe_add_column_to_dataframe(df, column_name, values): + """ + Fügt eine Spalte sicher zu einem DataFrame hinzu, auch wenn die Längen nicht übereinstimmen + """ + if df.empty: + return df + + df_len = len(df) + values_len = len(values) if hasattr(values, '__len__') else 0 + + if values_len == df_len: + # Perfekt - gleiche Länge + df[column_name] = values + elif values_len > df_len: + # Zu viele Werte - kürze sie + print(f"WARNUNG: {column_name} hat {values_len} Werte, DataFrame hat {df_len} Zeilen. Kürze Werte.") + df[column_name] = values[:df_len] + elif values_len < df_len: + # Zu wenige Werte - fülle mit NaN auf + print(f"WARNUNG: {column_name} hat {values_len} Werte, DataFrame hat {df_len} Zeilen. Fülle mit NaN auf.") + extended_values = list(values) + [None] * (df_len - values_len) + df[column_name] = extended_values else: - # Fallback wenn keine Elevation vorhanden - df['elev'] = 0 - df['delta_elev'] = 0 - df['rel_elev'] = 0 - - # Speed calculation - if 'speed_ms' in df.columns: - # Konvertiere m/s zu km/h - df['speed_kmh'] = df['speed_ms'] * 3.6 - else: - # Fallback: Berechne Speed aus GPS-Daten - df['delta_t'] = df['time'].diff().dt.total_seconds() - df['delta_d'] = df['cum_dist_km'].diff() - df['speed_kmh'] = (df['delta_d'] / df['delta_t']) * 3600 - df['speed_kmh'] = df['speed_kmh'].replace([np.inf, -np.inf], np.nan) - - # Velocity (used in pace calculations) - df['vel_kmps'] = np.gradient(df['cum_dist_km'], df['time_diff_sec']) - - # Smoothed speed (Gaussian rolling) - df['speed_kmh_smooth'] = df['speed_kmh'].rolling(window=10, win_type="gaussian", center=True).mean(std=2) - - - - - - - # Heart rate handling (NEU!) - # ############## - # UPDATE: Da NaN-Problem mit heart_rate, manuell nochmal neu einlesen und überschreiben: - # save heart rate data into variable - heart_rate = [] - for record in fit_file.get_messages("record"): - # Records can contain multiple pieces of data (ex: timestamp, latitude, longitude, etc) - for data in record: - # Print the name and value of the data (and the units if it has any) - if data.name == 'heart_rate': - heart_rate.append(data.value) - # hier variable neu überschrieben: - df['heart_rate'] = heart_rate[:len(df)] - # ############## - - # MY DEBUG: - #print(heart_rate) - if 'heart_rate' in df.columns: - df['heart_rate'] = pd.to_numeric(df['heart_rate'], errors='coerce') - df['hr_smooth'] = df['heart_rate'].rolling(window=5, center=True).mean() - print(f"Heart rate range: {df['heart_rate'].min():.0f} - {df['heart_rate'].max():.0f} bpm") - else: - print("Keine Heart Rate Daten gefunden!") - df['heart_rate'] = np.nan - df['hr_smooth'] = np.nan - - print(f"Verarbeitete FIT-Datei: {len(df)} Datenpunkte") - print(f"Distanz: {df['cum_dist_km'].iloc[-1]:.2f} km") - print(f"Dauer: {df['duration_hms'].iloc[-1]}") + # Keine Werte - fülle mit NaN + print(f"WARNUNG: Keine Werte für {column_name}. Fülle mit NaN.") + df[column_name] = [None] * df_len return df + + # ============================================================================= # INFO BANNER # =============================================================================