Source code for pylib.data

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Read and write data to or from file and manipulate data
structures.

:Date: 2019-12-28

.. module:: data
  :platform: *nix, Windows
  :synopsis: Handle data files and structures.

.. moduleauthor:: Daniel Weschke <daniel.weschke@directbox.de>
"""
import math
import pickle
import collections
import re

[docs]def issequence(obj): """ True for tuple, list, str False for int, dict, set :Example: >>> issequence(()) True >>> issequence((3, )) True >>> issequence([]) True >>> issequence([1]) True >>> issequence([1, 2]) True >>> issequence('') True >>> issequence((3)) False >>> issequence({}) False >>> issequence(set()) False """ return isinstance(obj, collections.abc.Sequence)
[docs]def read(file_name): """Read ascii data file. :param filename: file to read :type filename: str :returns: file content :rtype: str """ file_str = '' with open(file_name) as file: file_str = file.read() return file_str
[docs]def read_columns(file_name, x_column, y_column, default=None, verbose=False): """Read ascii data file. :param file_name: file to read :type file_name: str :param x_column: column index for the x data (first column is 0) :type x_column: int :param y_column: column index for the y data (first column is 0) :type y_column: int :param default: return object if data loading fails :type default: object :param verbose: verbose information (default = False) :type verbose: bool :returns: x and y :rtype: tuple(list, list) """ x = default y = default if verbose: print('check if data is available') try: file = open(file_name) x = [] y = [] for row in file: fields = re.split(r'\s+', row.strip()) x.append(float(fields[x_column])) y.append(float(fields[y_column])) file.close() except IOError: if verbose: print('data file not found') return x, y
[docs]def write(file_name, data): """Write ascii file. :param file_name: file to write :type file_name: str :param data: data to write :type data: str """ with open(file_name, 'w') as file: file.write(data)
[docs]def load(file_name, default=None, verbose=False): """Load stored program objects from binary file. :param file_name: file to load :type file_name: str :param default: return object if data loading fails :type default: object :param verbose: verbose information (default = False) :type verbose: bool :returns: loaded data :rtype: object """ if verbose: print('check if data is available') try: with open(file_name, 'rb') as input: # one load for every dump is needed to load all the data object_data = pickle.load(input) if verbose: print('found:') print(object_data) except IOError: object_data = default if verbose: print('no saved datas found') return object_data
[docs]def store(file_name, object_data): """Store program objects to binary file. :param file_name: file to store :type file_name: str :param object_data: data to store :type object_data: object """ with open(file_name, 'wb') as output: # every dump needs a load pickle.dump(object_data, output, pickle.HIGHEST_PROTOCOL)
[docs]def fold_list(lst, n): """Convert one-dimensional kx1 array (list) to two-dimensional mxn array. m = k / n :param lst: list to convert :type lst: list :param n: length of the second dimenson :type n: int :returns: two-dimensional array (list of lists) :rtype: list """ k = len(lst) if k % n == 0: length = int(k/n) return [lst[i*n:i*n+n] for i in range(length)]
[docs]def seq(start, stop=None, step=1): r"""Create an arithmetic bounded sequence. The sequence is one of the following; - empty :math:`\{\}=\emptyset`, if start and stop are the same - degenerate :math:`\{a\}`, if the sequence has only one element. - left-close and right-open :math:`[a, b)` :param start: start of the sequence, the lower bound. If only start is given than it is interpreted as stop and start will be 0. :type start: int or float :param stop: stop of sequence, the upper bound. :type stop: int or float :param step: step size, the common difference (constant difference between consecutive terms). :type step: int or float :returns: arithmetic bounded sequence :rtype: list """ # example of seq(4, 0, -0.4) # without round: # [4.0, 3.6, 3.2, 2.8, 2.4, 2.0, 1.5999999999999996, # 1.1999999999999997, 0.7999999999999998, 0.3999999999999999] # with round: # [4.0, 3.6, 3.2, 2.8, 2.4, 2.0, 1.6, 1.2, 0.8, 0.4] # example of seq(4, 0, -0.41) # without round: # [4.0, 3.59, 3.18, 2.77, 2.3600000000000003, # 1.9500000000000002, 1.54, 1.1300000000000003, # 0.7200000000000002, 0.31000000000000005] # with round: # [4.0, 3.59, 3.18, 2.77, 2.36, 1.95, 1.54, 1.13, 0.72, 0.31] if stop is None: return seq(0, start, step) start_str = str(start) start_exp = 0 if '.' in start_str: start_exp = len(start_str.split('.')[1]) step_str = str(step) step_exp = 0 if '.' in step_str: step_exp = len(step_str.split('.')[1]) exponent = max(start_exp, step_exp) # no stop because it is an open bound n = int(math.ceil((stop - start)/float(step))) lst = [] if n > 0: lst = [round(start + step*i, exponent) for i in range(n)] return lst
[docs]def unique_list_hashable(sequence): """ faster using set() but elements of the sequence must be hashable. unhashable types: 'list' """ # https://stackoverflow.com/a/37163210 used = set() # .add(x) will always be falsy return [x for x in sequence if x not in used and (used.add(x) or True)]
[docs]def unique_list(sequence): """ """ # https://stackoverflow.com/a/37163210 used = [] # .append(x) will always be falsy return [x for x in sequence if x not in used and (used.append(x) or True)]
[docs]def unique_ending(ids, n=1): """From id list get list with unique ending. :param ids: ids :type ids: list :param n: minumum chars or ints :type n: int :returns: unique ending of ids :rtype: list """ x = [idi[-n:] for idi in ids] if len(x) > len(set(x)): return unique_ending(ids, n+1) else: return x
[docs]def get_id(ids, uide): """Get full id from unique id ending. :param ids: ids :type ids: list :param uide: unique id ending :type uide: str :returns: full id :rtype: str or int """ # take first element, because we know it is a unique ending return [idi for idi in ids if idi.endswith(uide)][0]
[docs]def find_last(sequence, pattern): """Find last last occurance in sequence (text) :param sequence: text to search in :type sequence: str :param pattern: search pattern :type pattern: str :returns: index (-1 if pattern not in sequence) :rtype: int """ if pattern in sequence: return len(sequence)-1 - sequence[::-1].find(pattern) return -1
[docs]def str_between(text, left, right): """Get text between two pattern. Text can be multi-line. :param text: text to search in :type text: str :param left: left pattern :type left: str :param right: right pattern :type right: str :returns: text between the left and right pattern :rtype: str """ try: return re.search(left+'(.+?)'+right, text, re.DOTALL).group(1) except: return ''
[docs]def str_to_list(string, delimiter=';\n', newline_replacement=''): r"""Converts a string with block information into a list. This function un-wraps multi-line block information into one line. :param string: string with block information :type string: str :param delimiter: block delimiter (default = ';\n'). This will be removed from the resulting list. :type delimiter: str :param newline_replacement: block lines replacement (default = '') :type newline_replacement: str :returns: list of block information :rtype: list .. note:: Every line is left striped. Empty line are ignored. :Example: :: before (string): FILE_DESCRIPTION(('Open CASCADE Model'),'2;1'); FILE_NAME('Open CASCADE Shape Model','2019-10-14T14:32:20',('Author'),( 'Open CASCADE'),'Open CASCADE STEP processor 7.1','Open CASCADE 7.1' ,'Unknown'); FILE_SCHEMA(('AUTOMOTIVE_DESIGN { 1 0 10303 214 1 1 1 1 }')); after (list elements one per line): FILE_DESCRIPTION(('Open CASCADE Model'),'2;1') FILE_NAME('Open CASCADE Shape Model','2019-10-14T14:32:20',('Author'),('Open CASCADE'),'Open CASCADE STEP processor 7.1','Open CASCADE 7.1','Unknown') FILE_SCHEMA(('AUTOMOTIVE_DESIGN { 1 0 10303 214 1 1 1 1 }')) """ # for example blocks are seperated by ;\n blocks = string.split(delimiter) lines = [] for block in blocks: # un-wrap block to single line # remove \n and replace with newline_replacement line = newline_replacement.join([row.lstrip() for row in block.split('\n')]) # append line if True (if it has some content) lines.append(line) if bool(line) else False return lines