Initial import

testedminds · Mar 22, 2019 · fa3bef2 · fa3bef2
commit fa3bef2
Show file tree

Hide file tree

Showing 33 changed files with 577 additions and 0 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+* text=auto
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+docs/site/
+
+# OSX Junk
+.DS_Store
+
+# test cache
+.cache/*
+tests/__pycache__/*
+*.pytest_cache/
diff --git a/README.md b/README.md
@@ -0,0 +1,83 @@
+# Tested Minds Data Science Template
+
+
+_A logical, reasonably standardized, but flexible project structure for doing and sharing data science work._
+
+
+This is a customized [cookiecutter](https://cookiecutter.readthedocs.io/en/latest/readme.html) version of the [cookiecutter-data-science](http://drivendata.github.io/cookiecutter-data-science/) project from [DrivenData](https://www.drivendata.org/).
+
+
+#### [Documentation](http://drivendata.github.io/cookiecutter-data-science/)
+
+
+### Requirements to use the cookiecutter template:
+
+- Python 3.x
+- [Cookiecutter Python package](http://cookiecutter.readthedocs.org/en/latest/installation.html) >= 1.4.0
+
+``` bash
+pip install cookiecutter
+```
+
+
+### To start a new project, run:
+
+    cookiecutter https://github.com/testedminds/data-science-template
+
+
+
+### The resulting directory structure
+
+The directory structure of your new project looks like this:
+
+```
+├── Makefile           <- Makefile with commands like `make data` or `make train`
+├── README.md          <- The top-level README for developers using this project.
+├── data
+│   ├── external       <- Data from third-party sources.
+│   ├── interim        <- Intermediate data that has been transformed.
+│   ├── processed      <- The final, canonical data sets for modeling.
+│   └── raw            <- The original, immutable data dump.
+│
+├── docs               <- A default Sphinx project; see sphinx-doc.org for details
+│
+├── models             <- Trained and serialized models, model predictions, or model summaries
+│
+├── notebooks          <- Jupyter notebooks. Naming convention is a number (for ordering)
+│                         and a short `-` delimited description, e.g. `1.0-initial-data-exploration`.
+│
+├── references         <- Data dictionaries, manuals, and all other explanatory materials.
+│
+├── reports            <- Generated analysis as HTML, PDF, LaTeX, etc.
+│   └── figures        <- Generated graphics and figures to be used in reporting
+│
+├── requirements.txt   <- The requirements file for reproducing the analysis environment, e.g.
+│                         generated with `pip freeze > requirements.txt`
+│
+└── src                <- Source code for use in this project.
+    ├── __init__.py    <- Makes src a Python module
+    │
+    ├── data           <- Scripts to download or generate data
+    │   └── make_dataset.py
+    │
+    ├── features       <- Scripts to turn raw data into features for modeling
+    │   └── build_features.py
+    │
+    ├── models         <- Scripts to train models and then use trained models to make
+    │   │                 predictions
+    │   ├── predict_model.py
+    │   └── train_model.py
+    │
+    └── visualization  <- Scripts to create exploratory and results oriented visualizations
+        └── visualize.py
+```
+
+
+### Installing development requirements
+
+    pip install -r requirements.txt
+
+
+### Running the tests
+
+    py.test tests
diff --git a/cookiecutter.json b/cookiecutter.json
@@ -0,0 +1,6 @@
+{
+    "project_name": "project_name",
+    "repo_name": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}",
+    "author_name": "Your name (or your organization/company/team)",
+    "description": "A short description of the project."
+}
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,4 @@
+mkdocs
+mkdocs-cinder
+cookiecutter
+pytest
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,45 @@
+import sys
+import pytest
+import shutil
+from pathlib import Path
+from cookiecutter import main
+
+CCDS_ROOT = Path(__file__).parents[1].resolve()
+
+args = {
+        'project_name': 'TestedMinds',
+        'author_name': 'TestedMinds',
+        }
+
+
+def system_check(basename):
+    platform = sys.platform
+    if 'linux' in platform:
+        basename = basename.lower()
+    return basename
+
+
+@pytest.fixture(scope='class', params=[{}, args])
+def default_baked_project(tmpdir_factory, request):
+    temp = tmpdir_factory.mktemp('data-project')
+    out_dir = Path(temp).resolve()
+
+    pytest.param = request.param
+    main.cookiecutter(
+        str(CCDS_ROOT),
+        no_input=True,
+        extra_context=pytest.param,
+        output_dir=out_dir
+    )
+
+    pn = pytest.param.get('project_name') or 'project_name'
+
+    # project name gets converted to lower case on Linux but not Mac
+    pn = system_check(pn)
+
+    proj = out_dir / pn
+    request.cls.path = proj
+    yield
+
+    # cleanup after
+    shutil.rmtree(out_dir)
diff --git a/tests/test_creation.py b/tests/test_creation.py
@@ -0,0 +1,95 @@
+import os
+import pytest
+from subprocess import check_output
+from conftest import system_check, args
+
+
+def no_curlies(filepath):
+    """ Utility to make sure no curly braces appear in a file.
+        That is, was jinja able to render everthing?
+    """
+    with open(filepath, 'r') as f:
+        data = f.read()
+
+    template_strings = [
+        '{{',
+        '}}',
+        '{%',
+        '%}'
+    ]
+
+    template_strings_in_file = [s in data for s in template_strings]
+    return not any(template_strings_in_file)
+
+
+@pytest.mark.usefixtures("default_baked_project")
+class TestCookieSetup(object):
+
+    def test_project_name(self):
+        project = self.path
+        if pytest.param.get('project_name'):
+            name = system_check(args['project_name'])
+            assert project.name == name
+        else:
+            assert project.name == 'project_name'
+
+    def test_author(self):
+        setup_ = self.path / 'setup.py'
+        setup_args = ['python', setup_, '--author']
+        p = check_output(setup_args).decode('ascii').strip()
+        if pytest.param.get('author_name'):
+            assert p == args['author_name']
+        else:
+            assert p == 'Your name (or your organization/company/team)'
+
+    def test_readme(self):
+        readme_path = self.path / 'README.md'
+        assert readme_path.exists()
+        assert no_curlies(readme_path)
+        if pytest.param.get('project_name'):
+            with open(readme_path) as fin:
+                assert next(fin).strip() == args['project_name']
+
+    def test_setup(self):
+        setup_ = self.path / 'setup.py'
+        setup_args = ['python', setup_, '--version']
+        p = check_output(setup_args).decode('ascii').strip()
+        assert p == '0.1.0'
+
+    def test_requirements(self):
+        reqs_path = self.path / 'requirements.txt'
+        assert reqs_path.exists()
+        assert no_curlies(reqs_path)
+
+    def test_makefile(self):
+        makefile_path = self.path / 'Makefile'
+        assert makefile_path.exists()
+        assert no_curlies(makefile_path)
+
+    def test_folders(self):
+        expected_dirs = [
+            'data',
+            'data/external',
+            'data/interim',
+            'data/processed',
+            'data/raw',
+            'models',
+            'notebooks',
+            'references',
+            'reports',
+            'reports/figures',
+            'src',
+            'src/data',
+            'src/features',
+            'src/models',
+            'src/visualization',
+        ]
+
+        ignored_dirs = [
+            str(self.path)
+        ]
+
+        abs_expected_dirs = [str(self.path / d) for d in expected_dirs]
+        abs_dirs, _, _ = list(zip(*os.walk(self.path)))
+        assert len(set(abs_expected_dirs + ignored_dirs) - set(abs_dirs)) == 0
+
diff --git a/{{ cookiecutter.repo_name }}/.gitignore b/{{ cookiecutter.repo_name }}/.gitignore
@@ -0,0 +1,89 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# DotEnv configuration
+.env
+
+# Database
+*.db
+*.rdb
+
+# Pycharm
+.idea
+
+# VS Code
+.vscode/
+
+# Spyder
+.spyproject/
+
+# Jupyter NB Checkpoints
+.ipynb_checkpoints/
+
+# exclude data from source control by default
+/data/
+
+# Mac OS-specific storage files
+.DS_Store
+
+# vim
+*.swp
+*.swo
+
+# Mypy cache
+.mypy_cache/