Skip to content

Commit

Permalink
Initial import
Browse files Browse the repository at this point in the history
  • Loading branch information
bobbyno committed Mar 22, 2019
0 parents commit fa3bef2
Show file tree
Hide file tree
Showing 33 changed files with 577 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* text=auto
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
docs/site/

# OSX Junk
.DS_Store

# test cache
.cache/*
tests/__pycache__/*
*.pytest_cache/
83 changes: 83 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Tested Minds Data Science Template


_A logical, reasonably standardized, but flexible project structure for doing and sharing data science work._


This is a customized [cookiecutter](https://cookiecutter.readthedocs.io/en/latest/readme.html) version of the [cookiecutter-data-science](http://drivendata.github.io/cookiecutter-data-science/) project from [DrivenData](https://www.drivendata.org/).


#### [Documentation](http://drivendata.github.io/cookiecutter-data-science/)


### Requirements to use the cookiecutter template:

- Python 3.x
- [Cookiecutter Python package](http://cookiecutter.readthedocs.org/en/latest/installation.html) >= 1.4.0

``` bash
pip install cookiecutter
```


### To start a new project, run:

cookiecutter https://github.com/testedminds/data-science-template



### The resulting directory structure

The directory structure of your new project looks like this:

```
├── Makefile <- Makefile with commands like `make data` or `make train`
├── README.md <- The top-level README for developers using this project.
├── data
│ ├── external <- Data from third-party sources.
│ ├── interim <- Intermediate data that has been transformed.
│ ├── processed <- The final, canonical data sets for modeling.
│ └── raw <- The original, immutable data dump.
├── docs <- A default Sphinx project; see sphinx-doc.org for details
├── models <- Trained and serialized models, model predictions, or model summaries
├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering)
│ and a short `-` delimited description, e.g. `1.0-initial-data-exploration`.
├── references <- Data dictionaries, manuals, and all other explanatory materials.
├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
│ └── figures <- Generated graphics and figures to be used in reporting
├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
│ generated with `pip freeze > requirements.txt`
└── src <- Source code for use in this project.
├── __init__.py <- Makes src a Python module
├── data <- Scripts to download or generate data
│ └── make_dataset.py
├── features <- Scripts to turn raw data into features for modeling
│ └── build_features.py
├── models <- Scripts to train models and then use trained models to make
│ │ predictions
│ ├── predict_model.py
│ └── train_model.py
└── visualization <- Scripts to create exploratory and results oriented visualizations
└── visualize.py
```


### Installing development requirements

pip install -r requirements.txt


### Running the tests

py.test tests
6 changes: 6 additions & 0 deletions cookiecutter.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"project_name": "project_name",
"repo_name": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}",
"author_name": "Your name (or your organization/company/team)",
"description": "A short description of the project."
}
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mkdocs
mkdocs-cinder
cookiecutter
pytest
45 changes: 45 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import sys
import pytest
import shutil
from pathlib import Path
from cookiecutter import main

CCDS_ROOT = Path(__file__).parents[1].resolve()

args = {
'project_name': 'TestedMinds',
'author_name': 'TestedMinds',
}


def system_check(basename):
platform = sys.platform
if 'linux' in platform:
basename = basename.lower()
return basename


@pytest.fixture(scope='class', params=[{}, args])
def default_baked_project(tmpdir_factory, request):
temp = tmpdir_factory.mktemp('data-project')
out_dir = Path(temp).resolve()

pytest.param = request.param
main.cookiecutter(
str(CCDS_ROOT),
no_input=True,
extra_context=pytest.param,
output_dir=out_dir
)

pn = pytest.param.get('project_name') or 'project_name'

# project name gets converted to lower case on Linux but not Mac
pn = system_check(pn)

proj = out_dir / pn
request.cls.path = proj
yield

# cleanup after
shutil.rmtree(out_dir)
95 changes: 95 additions & 0 deletions tests/test_creation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import os
import pytest
from subprocess import check_output
from conftest import system_check, args


def no_curlies(filepath):
""" Utility to make sure no curly braces appear in a file.
That is, was jinja able to render everthing?
"""
with open(filepath, 'r') as f:
data = f.read()

template_strings = [
'{{',
'}}',
'{%',
'%}'
]

template_strings_in_file = [s in data for s in template_strings]
return not any(template_strings_in_file)


@pytest.mark.usefixtures("default_baked_project")
class TestCookieSetup(object):

def test_project_name(self):
project = self.path
if pytest.param.get('project_name'):
name = system_check(args['project_name'])
assert project.name == name
else:
assert project.name == 'project_name'

def test_author(self):
setup_ = self.path / 'setup.py'
setup_args = ['python', setup_, '--author']
p = check_output(setup_args).decode('ascii').strip()
if pytest.param.get('author_name'):
assert p == args['author_name']
else:
assert p == 'Your name (or your organization/company/team)'

def test_readme(self):
readme_path = self.path / 'README.md'
assert readme_path.exists()
assert no_curlies(readme_path)
if pytest.param.get('project_name'):
with open(readme_path) as fin:
assert next(fin).strip() == args['project_name']

def test_setup(self):
setup_ = self.path / 'setup.py'
setup_args = ['python', setup_, '--version']
p = check_output(setup_args).decode('ascii').strip()
assert p == '0.1.0'

def test_requirements(self):
reqs_path = self.path / 'requirements.txt'
assert reqs_path.exists()
assert no_curlies(reqs_path)

def test_makefile(self):
makefile_path = self.path / 'Makefile'
assert makefile_path.exists()
assert no_curlies(makefile_path)

def test_folders(self):
expected_dirs = [
'data',
'data/external',
'data/interim',
'data/processed',
'data/raw',
'models',
'notebooks',
'references',
'reports',
'reports/figures',
'src',
'src/data',
'src/features',
'src/models',
'src/visualization',
]

ignored_dirs = [
str(self.path)
]

abs_expected_dirs = [str(self.path / d) for d in expected_dirs]
abs_dirs, _, _ = list(zip(*os.walk(self.path)))
assert len(set(abs_expected_dirs + ignored_dirs) - set(abs_dirs)) == 0

89 changes: 89 additions & 0 deletions {{ cookiecutter.repo_name }}/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover

# Translations
*.mo
*.pot

# Django stuff:
*.log

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# DotEnv configuration
.env

# Database
*.db
*.rdb

# Pycharm
.idea

# VS Code
.vscode/

# Spyder
.spyproject/

# Jupyter NB Checkpoints
.ipynb_checkpoints/

# exclude data from source control by default
/data/

# Mac OS-specific storage files
.DS_Store

# vim
*.swp
*.swo

# Mypy cache
.mypy_cache/
Loading

0 comments on commit fa3bef2

Please sign in to comment.