diff --git a/omniduct/__init__.py b/omniduct/__init__.py index fc1a3d6..8d8c79f 100644 --- a/omniduct/__init__.py +++ b/omniduct/__init__.py @@ -1,3 +1,5 @@ +# flake8: noqa + from omniduct.duct import Duct from omniduct.registry import DuctRegistry from . import protocols diff --git a/omniduct/caches/base.py b/omniduct/caches/base.py index 17c46d0..1721a45 100644 --- a/omniduct/caches/base.py +++ b/omniduct/caches/base.py @@ -8,10 +8,9 @@ from omniduct.duct import Duct from omniduct.utils.config import config +from omniduct.utils.debug import logger from omniduct.utils.docs import quirk_docs -from ..utils.debug import logger - config.register('cache_fail_hard', description='Raise exception if cache fails to save.', default=False) diff --git a/omniduct/caches/local.py b/omniduct/caches/local.py index 6bfcac7..20cad32 100644 --- a/omniduct/caches/local.py +++ b/omniduct/caches/local.py @@ -6,8 +6,9 @@ import six -from ..utils.storage import ensure_path_exists -from ..utils.debug import logger +from omniduct.utils.debug import logger +from omniduct.utils.storage import ensure_path_exists + from .base import Cache @@ -51,7 +52,7 @@ def get_hash(cls, id_str): Returns: str: The sha1 hash of the id_str. """ - if sys.version_info.major == 3 or sys.version_info.major == 2 and isinstance(id_str, unicode): + if sys.version_info.major == 3 or sys.version_info.major == 2 and isinstance(id_str, unicode): # noqa: F821 id_str = id_str.encode('utf8') return hashlib.sha1(id_str).hexdigest() diff --git a/omniduct/databases/base.py b/omniduct/databases/base.py index bb3b7f0..0f0b48f 100644 --- a/omniduct/databases/base.py +++ b/omniduct/databases/base.py @@ -9,14 +9,17 @@ import sqlparse from decorator import decorator +from IPython import get_ipython from jinja2 import StrictUndefined, Template -from . import cursor_formatters from omniduct.caches.base import cached_method from omniduct.duct import Duct from omniduct.utils.debug import logger, logging_scope from omniduct.utils.docs import quirk_docs -from omniduct.utils.magics import MagicsProvider, process_line_arguments, process_line_cell_arguments +from omniduct.utils.magics import (MagicsProvider, process_line_arguments, + process_line_cell_arguments) + +from . import cursor_formatters logging.getLogger('requests').setLevel(logging.WARNING) @@ -156,7 +159,7 @@ def statement_hash(cls, statement, cleanup=True): """ if cleanup: statement = cls.statement_cleanup(statement) - if sys.version_info.major == 3 or sys.version_info.major == 2 and isinstance(statement, unicode): + if sys.version_info.major == 3 or sys.version_info.major == 2 and isinstance(statement, unicode): # noqa: F821 statement = statement.encode('utf8') return hashlib.sha256(statement).hexdigest() @@ -654,12 +657,12 @@ def query_magic(*args, **kwargs): @register_line_cell_magic("{}.{}".format(base_name, 'execute')) @process_line_cell_arguments - def query_magic(*args, **kwargs): + def execute_magic(*args, **kwargs): return statement_executor_magic('execute', *args, **kwargs) @register_line_cell_magic("{}.{}".format(base_name, 'stream')) @process_line_cell_arguments - def query_magic(*args, **kwargs): + def stream_magic(*args, **kwargs): return statement_executor_magic('stream', *args, **kwargs) @register_cell_magic("{}.{}".format(base_name, 'template')) diff --git a/omniduct/databases/cursor_formatters.py b/omniduct/databases/cursor_formatters.py index 407a856..d512c54 100644 --- a/omniduct/databases/cursor_formatters.py +++ b/omniduct/databases/cursor_formatters.py @@ -1,9 +1,11 @@ import csv import io import pickle +from distutils.version import LooseVersion import pandas as pd -from distutils.version import LooseVersion + +from omniduct.utils.debug import logger class CursorFormatter(object): diff --git a/omniduct/databases/hiveserver2.py b/omniduct/databases/hiveserver2.py index dbe68e2..a430214 100644 --- a/omniduct/databases/hiveserver2.py +++ b/omniduct/databases/hiveserver2.py @@ -1,7 +1,6 @@ from __future__ import absolute_import import json -import logging import os import re import shutil @@ -11,7 +10,6 @@ import pandas as pd from jinja2 import Template -from omniduct.utils.config import config from omniduct.utils.debug import logger from omniduct.utils.processes import Timeout, run_in_subprocess @@ -139,7 +137,7 @@ def _execute(self, statement, cursor=None, wait=True, poll_interval=1): log_offset = 0 if self.driver == 'pyhive': - from TCLIService.ttypes import TOperationState + from TCLIService.ttypes import TOperationState # noqa: F821 cursor.execute(statement, **{'async': True}) if wait: @@ -166,6 +164,7 @@ def _cursor_empty(self, cursor): return False def _cursor_wait(self, cursor, poll_interval=1): + from TCLIService.ttypes import TOperationState # noqa: F821 status = cursor.poll().operationState while status in (TOperationState.INITIALIZED_STATE, TOperationState.RUNNING_STATE): time.sleep(poll_interval) diff --git a/omniduct/databases/presto.py b/omniduct/databases/presto.py index 1249a4d..851c7e9 100644 --- a/omniduct/databases/presto.py +++ b/omniduct/databases/presto.py @@ -9,9 +9,9 @@ import six from future.utils import raise_with_traceback +from omniduct._version import __version__ from omniduct.utils.debug import logger -from .._version import __version__ from .base import DatabaseClient from .schemas import SchemasMixin diff --git a/omniduct/databases/schemas.py b/omniduct/databases/schemas.py index 80c57dd..5986ae1 100644 --- a/omniduct/databases/schemas.py +++ b/omniduct/databases/schemas.py @@ -1,28 +1,27 @@ from __future__ import absolute_import -import logging import pandas as pd -from sqlalchemy import (ARRAY, Boolean, Column, Float, Integer, MetaData, - String, Table, inspect, types) +import sqlalchemy +from sqlalchemy import Table +from sqlalchemy import types as sql_types from omniduct.utils.debug import logger - try: from pyhive.sqlalchemy_presto import PrestoDialect def get_columns(self, connection, table_name, schema=None, **kw): # Extend types supported by PrestoDialect as defined in PyHive type_map = { - 'bigint': types.BigInteger, - 'integer': types.Integer, - 'boolean': types.Boolean, - 'double': types.Float, - 'varchar': types.String, - 'timestamp': types.TIMESTAMP, - 'date': types.DATE, - 'array': ARRAY(Integer), - 'array': ARRAY(String) + 'bigint': sql_types.BigInteger, + 'integer': sql_types.Integer, + 'boolean': sql_types.Boolean, + 'double': sql_types.Float, + 'varchar': sql_types.String, + 'timestamp': sql_types.TIMESTAMP, + 'date': sql_types.DATE, + 'array': sql_types.ARRAY(sql_types.Integer), + 'array': sql_types.ARRAY(sql_types.String) } rows = self._get_table_columns(connection, table_name, schema) @@ -32,7 +31,7 @@ def get_columns(self, connection, table_name, schema=None, **kw): coltype = type_map[row.Type] except KeyError: logger.warn("Did not recognize type '%s' of column '%s'" % (row.Type, row.Column)) - coltype = types.NullType + coltype = sql_types.NullType result.append({ 'name': row.Column, 'type': coltype, @@ -93,7 +92,7 @@ class Schemas(object): def __init__(self, metadata): self._metadata = metadata - self._schema_names = inspect(self._metadata.bind).get_schema_names() + self._schema_names = sqlalchemy.inspect(self._metadata.bind).get_schema_names() self._schema_cache = {} def __dir__(self): @@ -125,7 +124,7 @@ class Schema(object): def __init__(self, metadata, schema): self._metadata = metadata self._schema = schema - self._table_names = inspect(self._metadata.bind).get_table_names(schema) + self._table_names = sqlalchemy.inspect(self._metadata.bind).get_table_names(schema) self._table_cache = {} def __dir__(self): diff --git a/omniduct/duct.py b/omniduct/duct.py index e74dd5f..3038afd 100644 --- a/omniduct/duct.py +++ b/omniduct/duct.py @@ -1,5 +1,4 @@ import atexit -import decorator import functools import getpass import inspect @@ -11,14 +10,15 @@ from builtins import input from enum import Enum +import decorator import six from future.utils import raise_with_traceback, with_metaclass -from omniduct.errors import DuctServerUnreachable, DuctProtocolUnknown +from omniduct.errors import DuctProtocolUnknown, DuctServerUnreachable from omniduct.utils.debug import logger, logging_scope from omniduct.utils.dependencies import check_dependencies from omniduct.utils.docs import quirk_docs -from omniduct.utils.ports import naive_load_balancer, is_port_bound +from omniduct.utils.ports import is_port_bound, naive_load_balancer class ProtocolRegisteringABCMeta(ABCMeta): diff --git a/omniduct/filesystems/base.py b/omniduct/filesystems/base.py index 45e5d48..8058cd4 100644 --- a/omniduct/filesystems/base.py +++ b/omniduct/filesystems/base.py @@ -1,8 +1,9 @@ import io from abc import abstractmethod -from collections import namedtuple, OrderedDict +from collections import OrderedDict, namedtuple import pandas as pd + from omniduct.duct import Duct from omniduct.utils.docs import quirk_docs from omniduct.utils.magics import MagicsProvider, process_line_arguments diff --git a/omniduct/filesystems/webhdfs_helpers.py b/omniduct/filesystems/webhdfs_helpers.py index b70ad99..d272330 100644 --- a/omniduct/filesystems/webhdfs_helpers.py +++ b/omniduct/filesystems/webhdfs_helpers.py @@ -2,12 +2,12 @@ import xml.dom.minidom import requests +from six.moves import http_client + from pywebhdfs import errors from pywebhdfs.webhdfs import (PyWebHdfsClient, _is_standby_exception, _move_active_host_to_head) -from six.moves import http_client - class OmniductPyWebHdfsClient(PyWebHdfsClient): diff --git a/omniduct/protocols.py b/omniduct/protocols.py index c0b4638..cf367c5 100644 --- a/omniduct/protocols.py +++ b/omniduct/protocols.py @@ -1,9 +1,11 @@ +# flake8: noqa + from .caches.local import LocalCache +from .databases.druid import DruidClient from .databases.hiveserver2 import HiveServer2Client +from .databases.neo4j import Neo4jClient from .databases.presto import PrestoClient from .databases.sqlalchemy import SQLAlchemyClient -from .databases.neo4j import Neo4jClient -from .databases.druid import DruidClient from .filesystems.local import LocalFsClient from .filesystems.s3 import S3Client from .filesystems.webhdfs import WebHdfsClient diff --git a/omniduct/registry.py b/omniduct/registry.py index 4660ef5..73c8dd2 100644 --- a/omniduct/registry.py +++ b/omniduct/registry.py @@ -2,10 +2,10 @@ import yaml from omniduct.duct import Duct +from omniduct.errors import DuctProtocolUnknown +from omniduct.utils.debug import logger from omniduct.utils.magics import MagicsProvider from omniduct.utils.proxies import NestedDictObjectProxy -from omniduct.utils.debug import logger -from omniduct.errors import DuctProtocolUnknown class DuctRegistry(object): diff --git a/omniduct/remotes/ssh.py b/omniduct/remotes/ssh.py index 74abb00..444ba20 100644 --- a/omniduct/remotes/ssh.py +++ b/omniduct/remotes/ssh.py @@ -7,18 +7,19 @@ from builtins import input from io import open -try: # Python 3 - from shlex import quote as escape_path -except ImportError: # Python 2.7 - from pipes import quote as escape_path - import pandas as pd +from omniduct.errors import DuctAuthenticationError from omniduct.filesystems.base import FileSystemFileDesc from omniduct.remotes.base import RemoteClient from omniduct.utils.debug import logger from omniduct.utils.processes import run_in_subprocess -from omniduct.errors import DuctAuthenticationError + +try: # Python 3 + from shlex import quote as escape_path +except ImportError: # Python 2.7 + from pipes import quote as escape_path + SSH_ASKPASS = '{omniduct_dir}/utils/ssh_askpass'.format(omniduct_dir=os.path.dirname(__file__)) SESSION_SSH_USERNAME = None diff --git a/omniduct/session.py b/omniduct/session.py index ef03e2b..9a11472 100644 --- a/omniduct/session.py +++ b/omniduct/session.py @@ -6,13 +6,15 @@ from .utils.config import config +__all__ = ['config', 'registry'] + + # The default console width is too wide for most notebooks, leading to ugly logging message / progress bars. We set this # to a more reasonable value for Jupyter Notebooks. ip = IPython.get_ipython() if ip and ip.__class__.__name__ == "ZMQInteractiveShell": os.environ['COLUMNS'] = "80" -__all__ = ['config', 'registry', '__author__', '__author_email__', '__version__'] OMNIDUCT_CONFIG = os.environ.get('OMNIDUCT_CONFIG', None) or os.path.expanduser('~/.omniduct/config') diff --git a/omniduct/utils/debug.py b/omniduct/utils/debug.py index a40f28a..335cf30 100644 --- a/omniduct/utils/debug.py +++ b/omniduct/utils/debug.py @@ -4,7 +4,6 @@ import logging import sys import time -import types import progressbar import six diff --git a/omniduct/utils/proxies.py b/omniduct/utils/proxies.py index c4967e2..cb729e7 100644 --- a/omniduct/utils/proxies.py +++ b/omniduct/utils/proxies.py @@ -30,7 +30,6 @@ def add_nested(tree, key, value): def flat_to_nested(tree): out = {} - children = {} for k, v in tree.items(): add_nested(out, k, v) return out diff --git a/omniduct/utils/submodules.py b/omniduct/utils/submodules.py index 4849e02..2c0c3dd 100644 --- a/omniduct/utils/submodules.py +++ b/omniduct/utils/submodules.py @@ -1,6 +1,6 @@ -import sys import importlib import pkgutil +import sys def import_submodules(package_name): diff --git a/setup.cfg b/setup.cfg index 8d1d45a..b17e62e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [bdist_wheel] universal=1 -[pycodestyle] -ignore = E501,E712,E722,W503,W504 +[flake8] +ignore = E501,E712,E722,W503,W504,W601,W606 max-line-length = 160 diff --git a/tox.ini b/tox.ini index b1eb7fb..28d6aff 100644 --- a/tox.ini +++ b/tox.ini @@ -7,10 +7,10 @@ envlist = [testenv] deps= mock - pycodestyle + flake8 pyfakefs pytest requests commands= pytest tests - pycodestyle --ignore=E501,E712,E722,W503,W504,W601,W606 omniduct tests + flake8 omniduct tests