Source code for exatomic.exa.core.editor

# -*- coding: utf-8 -*-
# Copyright (c) 2015-2022, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
"""
Editor
####################################
Text-editor-like functionality for programatically manipulating raw text input
and output data and converting this data into container objects. This class
does not behave like a fully fledged text editor but does have some basic find,
replace, insert, etc. functionality.
"""
from __future__ import print_function
import logging
import io, os, re, sys
import pandas as pd
import numpy as np
import warnings


[docs]class Editor(object): """ An editor is a representation of a text file on disk that can be programmatically manipulated. Text lines are stored in memory; no files remain open. This class does not strive to be a fully fledged text editor rather a base class for converting input and output data from text on disk to some type of (exatomic framework) container object (and vice versa). >>> template = "Hello World!\\nHello {user}" >>> editor = Editor(template) >>> editor[0] 'Hello World!' >>> len(editor) 2 >>> del editor[0] >>> len(editor) 1 >>> editor.write(fullpath=None, user='Alice') Hello Alice Tip: Editor line numbers use a 0 base index. To increase the number of lines displayed by the repr, increase the value of the **nprint** attribute. Warning: For large text with repeating strings be sure to use the **as_interned** argument. Attributes: name (str): Data/file/misc name description (str): Data/file/misc description meta (dict): Additional metadata as key, value pairs nrpint (int): Number of lines to display when printing cursor (int): Line number position of the cusor (see :func:`~exatomic.exa.core.editor.Editor.find_next`) """ _getter_prefix = 'parse' _fmt = '{0}: {1}\n'.format # Format for printing lines (see __repr__) @property def log(self): name = '.'.join([self.__module__, self.__class__.__name__]) return logging.getLogger(name)
[docs] def write(self, path=None, *args, **kwargs): """ Perform formatting and write the formatted string to a file or stdout. Optional arguments can be used to format the editor's contents. If no file path is given, prints to standard output. Args: path (str): Full file path (default None, prints to stdout) *args: Positional arguments to format the editor with **kwargs: Keyword arguments to format the editor with """ if path is None: print(self.format(*args, **kwargs)) else: with io.open(path, 'w', newline="") as f: f.write(self.format(*args, **kwargs))
[docs] def format(self, *args, **kwargs): """ Format the string representation of the editor. Args: inplace (bool): If True, overwrite editor's contents with formatted contents """ inplace = kwargs.pop("inplace", False) if not inplace: return str(self).format(*args, **kwargs) self._lines = str(self).format(*args, **kwargs).splitlines()
[docs] def head(self, n=10): """ Display the top of the file. Args: n (int): Number of lines to display """ print("".join(self._lines[:n]), end="")
[docs] def tail(self, n=10): """ Display the bottom of the file. Args: n (int): Number of lines to display """ print("".join(self._lines[-n:]), end="")
[docs] def append(self, lines): """ Args: lines (list): List of line strings to append to the end of the editor """ if isinstance(lines, list): self._lines = self._lines + lines elif isinstance(lines, str): lines = lines.split('\n') self._lines = self._lines + lines else: raise TypeError(f"Unsupported type '{type(lines)}' for lines")
[docs] def prepend(self, lines): """ Args: lines (list): List of line strings to insert at the beginning of the editor """ if isinstance(lines, list): self._lines = lines + self._lines elif isinstance(lines, str): lines = lines.split('\n') self._lines = lines + self._lines else: raise TypeError(f"Unsupported type '{type(lines)}' for lines")
[docs] def insert(self, lines=None): """ Insert lines into the editor. Note: To insert before the first line, use :func:`~exatomic.exa.core.editor.Editor.preappend` (or key 0); to insert after the last line use :func:`~exatomic.exa.core.editor.Editor.append`. Args: lines (dict): Dictionary of lines of form (lineno, string) pairs """ for i, (key, line) in enumerate(lines.items()): n = key + i first_half = self._lines[:n] last_half = self._lines[n:] self._lines = first_half + [line] + last_half
[docs] def remove_blank_lines(self): """Remove all blank lines (blank lines are those with zero characters).""" to_remove = [] for i, line in enumerate(self): ln = line.strip() if ln == '': to_remove.append(i) self.delete_lines(to_remove)
[docs] def delete_lines(self, lines): """ Delete all lines with given line numbers. Args: lines (list): List of integers corresponding to line numbers to delete """ for k, i in enumerate(lines): del self[i-k] # Accounts for the fact that len(self) decrease upon deletion
[docs] def find(self, *strings, **kwargs): """ Search the entire editor for lines that match the string. .. code-block:: Python string = '''word one word two three''' ed = Editor(string) ed.find('word') # [(0, "word one"), (1, "word two")] ed.find('word', 'three') # {'word': [...], 'three': [(2, "three")]} Args: strings (str): Any number of strings to search for keys_only (bool): Only return keys start (int): Optional line to start searching on stop (int): Optional line to stop searching on Returns: results: If multiple strings searched a dictionary of string key, (line number, line) values (else just values) """ start = kwargs.pop("start", 0) stop = kwargs.pop("stop", None) keys_only = kwargs.pop("keys_only", False) results = {string: [] for string in strings} stop = len(self) if stop is None else stop for i, line in enumerate(self[start:stop]): for string in strings: if string in line: if keys_only: results[string].append(i) else: results[string].append((i, line)) if len(strings) == 1: return results[strings[0]] return results
[docs] def find_next(self, *strings, **kwargs): """ From the editor's current cursor position find the next instance of the given string. Args: strings (iterable): String or strings to search for Returns: tup (tuple): Tuple of cursor position and line or None if not found Note: This function cycles the entire editor (i.e. cursor to length of editor to zero and back to cursor position). """ start = kwargs.pop("start", None) keys_only = kwargs.pop("keys_only", False) staht = start if start is not None else self.cursor for start, stop in [(staht, len(self)), (0, staht)]: for i in range(start, stop): for string in strings: if string in self[i]: tup = (i, self[i]) self.cursor = i + 1 if keys_only: return i return tup
[docs] def regex(self, *patterns, **kwargs): """ Search the editor for lines matching the regular expression. re.MULTILINE is not currently supported. Args: patterns: Regular expressions to search each line for keys_only (bool): Only return keys flags (re.FLAG): flags passed to re.search Returns: results (dict): Dictionary of pattern keys, line values (or groups - default) """ start = kwargs.pop("start", 0) stop = kwargs.pop("stop", None) keys_only = kwargs.pop("keys_only", False) flags = kwargs.pop("flags", 0) results = {pattern: [] for pattern in patterns} stop = stop if stop is not None else -1 for i, line in enumerate(self[start:stop]): for pattern in patterns: grps = re.search(pattern, line, flags=flags) if grps and keys_only: results[pattern].append(i) elif grps and grps.groups(): for group in grps.groups(): results[pattern].append((i, group)) elif grps: results[pattern].append((i, line)) if len(patterns) == 1: return results[patterns[0]] return results
[docs] def replace(self, pattern, replacement): """ Replace all instances of a pattern with a replacement. Args: pattern (str): Pattern to replace replacement (str): Text to insert """ for i, line in enumerate(self): if pattern in line: self[i] = line.replace(pattern, replacement)
[docs] def pandas_dataframe(self, start, stop, ncol, **kwargs): """ Returns the result of tab-separated pandas.read_csv on a subset of the file. Args: start (int): line number where structured data starts stop (int): line number where structured data stops ncol (int or list): the number of columns in the structured data or a list of that length with column names Returns: pd.DataFrame: structured data """ if isinstance(ncol, (int, np.int64, np.int32)): return pd.read_csv(io.StringIO('\n'.join(self[start:stop])), delim_whitespace=True, names=range(ncol), **kwargs) else: return pd.read_csv(io.StringIO('\n'.join(self[start:stop])), delim_whitespace=True, names=ncol, **kwargs)
[docs] def to_stream(self): """Create an StringIO object from the current editor text.""" return io.StringIO(str(self))
@property def variables(self): """ Display a list of templatable variables present in the file. Templating is accomplished by creating a bracketed object in the same way that Python performs `string formatting`_. The editor is able to replace the placeholder value of the template. Integer templates are positional arguments. .. _string formatting: https://docs.python.org/3.6/library/string.html """ string = str(self) constants = [match[1:-1] for match in re.findall('{{[A-z0-9]}}', string)] variables = re.findall('{[A-z0-9]*}', string) return sorted(set(variables).difference(constants))
[docs] @classmethod def from_file(cls, path, **kwargs): """Create an editor instance from a file on disk.""" lines = lines_from_file(path) if 'meta' not in kwargs: kwargs['meta'] = {'from': 'file'} kwargs['meta']['filepath'] = path return cls(lines, **kwargs)
[docs] @classmethod def from_stream(cls, f, **kwargs): """Create an editor instance from a file stream.""" lines = lines_from_stream(f) if 'meta' not in kwargs: kwargs['meta'] = {'from': 'stream'} kwargs['meta']['filepath'] = f.name if hasattr(f, 'name') else None return cls(lines, **kwargs)
[docs] @classmethod def from_string(cls, string, **kwargs): """Create an editor instance from a string template.""" return cls(lines_from_string(string), **kwargs)
def __init__(self, path_stream_or_string, as_interned=False, nprint=30, name=None, description=None, meta=None, encoding=None, ignore=False): # Backporting file check textobj = path_stream_or_string if (isinstance(textobj, str) and len(textobj.split("\n")) == 1 and ignore == False and not os.path.exists(textobj)): warnings.warn("Possibly incorrect file path! {}".format(textobj)) #if len(path_stream_or_string) < 256 and os.path.exists(path_stream_or_string): if (isinstance(path_stream_or_string, str) and len(path_stream_or_string) < 32760 and os.path.exists(path_stream_or_string)): self._lines = lines_from_file(path_stream_or_string, as_interned, encoding) elif isinstance(path_stream_or_string, (list, tuple)): self._lines = path_stream_or_string elif isinstance(path_stream_or_string, (io.TextIOWrapper, io.StringIO)): self._lines = lines_from_stream(path_stream_or_string, as_interned) elif isinstance(path_stream_or_string, str): self._lines = lines_from_string(path_stream_or_string, as_interned) else: raise TypeError('Unknown type for arg data: {}'.format(type(path_stream_or_string))) self.name = name self.description = description self.meta = meta self.nprint = 30 self.cursor = 0 self.log.debug('contains {} lines'.format(len(self._lines))) def __delitem__(self, line): del self._lines[line] # "line" is the line number minus one def __getitem__(self, key): if isinstance(key, str): return getattr(self, key) return self._lines[key] def __setitem__(self, line, value): self._lines[line] = value def __iter__(self): for line in self._lines: yield line def __len__(self): return len(self._lines) def __str__(self): return '\n'.join(self._lines) def __contains__(self, item): for obj in self: if item in obj: return True def __repr__(self): r = '' nn = len(self) n = len(str(nn)) if nn > self.nprint * 2: for i in range(self.nprint): ln = str(i).rjust(n, ' ') r += self._fmt(ln, self._lines[i]) r += '...\n'.rjust(n, ' ') for i in range(nn - self.nprint, nn): ln = str(i).rjust(n, ' ') r += self._fmt(ln, self._lines[i]) else: for i, line in enumerate(self): ln = str(i).rjust(n, ' ') r += self._fmt(ln, line) return r
[docs]def lines_from_file(path, as_interned=False, encoding=None): """ Create a list of file lines from a given filepath. Args: path (str): File path as_interned (bool): List of "interned" strings (default False) Returns: strings (list): File line list """ lines = None with io.open(path, encoding=encoding) as f: if as_interned: lines = [sys.intern(line) for line in f.read().splitlines()] else: lines = f.read().splitlines() return lines
[docs]def lines_from_stream(f, as_interned=False): """ Create a list of file lines from a given file stream. Args: f (io.TextIOWrapper): File stream as_interned (bool): List of "interned" strings (default False) Returns: strings (list): File line list """ if as_interned: return [sys.intern(line) for line in f.read().splitlines()] return f.read().splitlines()
[docs]def lines_from_string(string, as_interned=False): """ Create a list of file lines from a given string. Args: string (str): File string as_interned (bool): List of "interned" strings (default False) Returns: strings (list): File line list """ if as_interned: return [sys.intern(line) for line in string.splitlines()] return string.splitlines()