Skip to content

Commit 2201745

Browse files
Improve tooling and backend for creating and working with knowledge posts (airbnb#308)
1 parent 72e91ec commit 2201745

File tree

9 files changed

+390
-17
lines changed

9 files changed

+390
-17
lines changed

knowledge_repo/config.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import time
66
import types
7+
import yaml
78

89
logger = logging.getLogger(__name__)
910

@@ -89,7 +90,7 @@ def __set_from_file(self, d, filename, force=False):
8990
self.__set_from_module(d, config, force)
9091
elif filename.endswith('.yml'):
9192
with open(filename) as f:
92-
config = yaml.load(f)
93+
config = yaml.safe_load(f)
9394
self.update(config)
9495

9596
def __set_from_module(self, d, module, force=False):

knowledge_repo/converters/pkp.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import zipfile
22
import io
3+
import os
34

45
from ..converter import KnowledgePostConverter
56

67

7-
class IpynbFormat(KnowledgePostConverter):
8+
class PkpConverter(KnowledgePostConverter):
89
_registry_keys = ['kp', 'zip']
910

1011
def to_file(self, filename):

knowledge_repo/repositories/folder.py

+226
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
from __future__ import print_function
2+
from builtins import input
3+
4+
import os
5+
import shutil
6+
import logging
7+
import re
8+
import git
9+
import socket
10+
import time
11+
from io import open
12+
13+
from knowledge_repo._version import __git_uri__
14+
from ..post import KnowledgePost
15+
from ..repository import KnowledgeRepository
16+
from ..utils.exec_code import get_module_for_source
17+
from ..utils.types import str_types
18+
from ..utils.encoding import encode
19+
20+
logger = logging.getLogger(__name__)
21+
22+
23+
class FolderKnowledgeRepository(KnowledgeRepository):
24+
_registry_keys = ['', 'file']
25+
26+
TEMPLATES = {
27+
'README.md': os.path.abspath(os.path.join(os.path.dirname(__file__), '../templates', 'repository_readme.md')),
28+
'.knowledge_repo_config.yml': os.path.abspath(os.path.join(os.path.dirname(__file__), '../templates', 'repository_config.yml'))
29+
}
30+
31+
@classmethod
32+
def create(cls, uri, embed_tooling=False):
33+
if uri.startswith('file://'):
34+
uri = uri[len('file://'):]
35+
path = os.path.abspath(uri)
36+
if not os.path.exists(path):
37+
os.makedirs(path)
38+
39+
# Add README and configuration templates
40+
for filename, template in cls.TEMPLATES.items():
41+
target = os.path.join(path, filename)
42+
if not os.path.exists(target):
43+
shutil.copy(template, target)
44+
else:
45+
logger.warning("Not overriding existing file '{}'.".format(filename))
46+
return FolderKnowledgeRepository(path)
47+
48+
@classmethod
49+
def from_uri(cls, uri, *args, **kwargs):
50+
"""
51+
If this folder is actually a git repository, a `GitKnowledgeRepository`
52+
is returned instead, unless the folder knowledge repository is explicitly
53+
requested via the 'file://' protocol.
54+
"""
55+
check_for_git = True
56+
if uri.startswith('file://'):
57+
check_for_git = False
58+
uri = uri[len('file://'):]
59+
if check_for_git and os.path.exists(os.path.join(uri, '.git')):
60+
from .gitrepository import GitKnowledgeRepository
61+
return GitKnowledgeRepository(uri, *args, **kwargs)
62+
return cls(uri, *args, **kwargs)
63+
64+
def init(self, config='.knowledge_repo_config.yml', auto_create=False):
65+
self.auto_create = auto_create
66+
self.path = self.uri
67+
self.config.update(os.path.join(self.path, config))
68+
69+
@property
70+
def path(self):
71+
return self._path
72+
73+
@path.setter
74+
def path(self, path):
75+
assert isinstance(path, str), "The path specified must be a string."
76+
path = os.path.abspath(os.path.expanduser(path))
77+
if not os.path.exists(path):
78+
path = os.path.abspath(path)
79+
if self.auto_create:
80+
self.create(path)
81+
else:
82+
raise ValueError("Provided path '{}' does not exist.".format(path))
83+
self._path = path
84+
85+
# ----------- Repository actions / state ------------------------------------
86+
@property
87+
def revision(self):
88+
return time.time()
89+
90+
@property
91+
def status(self):
92+
return 'OK'
93+
94+
@property
95+
def status_message(self):
96+
return 'OK'
97+
98+
# ---------------- Post retrieval methods --------------------------------
99+
100+
def _dir(self, prefix, statuses):
101+
posts = set()
102+
103+
if self.PostStatus.PUBLISHED in statuses:
104+
105+
for path, folders, files in os.walk(os.path.join(self.path, prefix or '')):
106+
107+
# Do not visit hidden folders
108+
for folder in folders:
109+
if folder.startswith('.'):
110+
folders.remove(folder)
111+
112+
posts.update(
113+
os.path.join(os.path.relpath(path, start=self.path), folder)
114+
for folder in folders if folder.endswith('.kp')
115+
)
116+
posts.update(
117+
os.path.join(os.path.relpath(path, start=self.path), file)
118+
for file in files if file.endswith('.kp')
119+
)
120+
121+
for post in sorted([post[2:] if post.startswith('./') else post for post in posts]):
122+
yield post
123+
124+
# ------------- Post submission / addition user flow ----------------------
125+
def _add_prepare(self, kp, path, update=False, **kwargs):
126+
pass
127+
128+
def _add_cleanup(self, kp, path, update=False, **kwargs):
129+
pass
130+
131+
def _submit(self, path=None, branch=None, force=False):
132+
pass # Added posts are already submitted
133+
134+
def _publish(self, path): # Publish a post for general perusal
135+
pass # Added posts are already published
136+
137+
def _unpublish(self, path): # unpublish a post for general perusal
138+
raise NotImplementedError
139+
140+
def _accept(self, path): # Approve to publish a post for general perusal
141+
pass
142+
143+
def _remove(self, path, all=False):
144+
shutil.rmtree(os.path.join(self.path, path))
145+
146+
# ------------ Knowledge Post Data Retrieval Methods -------------------------
147+
148+
def _kp_uuid(self, path):
149+
try:
150+
return self._kp_read_ref(path, 'UUID')
151+
except:
152+
return None
153+
154+
def _kp_path(self, path, rel=None):
155+
return KnowledgeRepository._kp_path(self, os.path.expanduser(path), rel=rel or self.path)
156+
157+
def _kp_exists(self, path, revision=None):
158+
return os.path.exists(os.path.join(self.path, path))
159+
160+
def _kp_status(self, path, revision=None, detailed=False, branch=None):
161+
return self.PostStatus.PUBLISHED
162+
163+
def _kp_get_revision(self, path):
164+
# We use a 'REVISION' file in the knowledge post folder rather than using git
165+
# revisions because using git rev-parse is slow.
166+
try:
167+
return int(self._kp_read_ref(path, 'REVISION'))
168+
except:
169+
return 0
170+
171+
def _kp_get_revisions(self, path):
172+
raise NotImplementedError
173+
174+
def _kp_write_ref(self, path, reference, data, uuid=None, revision=None):
175+
path = os.path.join(self.path, path)
176+
if os.path.isfile(path):
177+
kp = KnowledgePost.from_file(path, format='kp')
178+
kp._write_ref(reference, data)
179+
kp.to_file(path, format='kp')
180+
else:
181+
ref_path = os.path.join(path, reference)
182+
ref_dir = os.path.dirname(ref_path)
183+
if not os.path.exists(ref_dir):
184+
os.makedirs(ref_dir)
185+
with open(ref_path, 'wb') as f:
186+
return f.write(data)
187+
188+
def _kp_dir(self, path, parent=None, revision=None): # TODO: Account for revision
189+
path = os.path.join(self.path, path)
190+
if os.path.isdir(path):
191+
if parent:
192+
path = os.path.join(path, parent)
193+
for dirpath, dirnames, filenames in os.walk(os.path.join(self.path, path)):
194+
for filename in filenames:
195+
if dirpath == "" and filename == "REVISION":
196+
continue
197+
yield os.path.relpath(os.path.join(dirpath, filename), os.path.join(self.path, path))
198+
else:
199+
kp = KnowledgePost.from_file(path, format='kp')
200+
for reference in kp._dir(parent=parent):
201+
yield reference
202+
203+
def _kp_has_ref(self, path, reference, revision=None): # TODO: Account for revision
204+
path = os.path.join(self.path, path)
205+
if os.path.isdir(path):
206+
return os.path.isfile(os.path.join(path, reference))
207+
else:
208+
kp = KnowledgePost.from_file(path, format='kp')
209+
return kp._has_ref(reference)
210+
211+
def _kp_diff(self, path, head, base):
212+
raise NotImplementedError
213+
214+
def _kp_new_revision(self, path, uuid=None):
215+
self._kp_write_ref(path, "REVISION", encode(self._kp_get_revision(path) + 1))
216+
if uuid:
217+
self._kp_write_ref(path, "UUID", encode(uuid))
218+
219+
def _kp_read_ref(self, path, reference, revision=None):
220+
path = os.path.join(self.path, path)
221+
if os.path.isdir(path):
222+
with open(os.path.join(self.path, path, reference), 'rb') as f:
223+
return f.read()
224+
else:
225+
kp = KnowledgePost.from_file(path, format='kp')
226+
return kp._read_ref(reference)

knowledge_repo/repositories/gitrepository.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222

2323
class GitKnowledgeRepository(KnowledgeRepository):
24-
_registry_keys = ['', 'git']
24+
_registry_keys = ['git']
2525

2626
TEMPLATES = {
2727
'README.md': os.path.abspath(os.path.join(os.path.dirname(__file__), '../templates', 'repository_readme.md')),

knowledge_repo/repository.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def for_uri(cls, uri, *args, **kwargs):
4343
if isinstance(uri, dict):
4444
return cls.for_uris(uri)
4545
scheme = urlparse(uri).scheme
46-
return cls._get_subclass_for(scheme)(uri, *args, **kwargs)
46+
return cls._get_subclass_for(scheme).from_uri(uri, *args, **kwargs)
4747

4848
@classmethod
4949
def for_uris(cls, uri):
@@ -57,6 +57,10 @@ def for_uris(cls, uri):
5757
krs = {name: cls.for_uri(uri) for name, uri in list(uris.items())}
5858
return MetaKnowledgeRepository(krs)
5959

60+
@classmethod
61+
def from_uri(cls, url, *args, **kwargs):
62+
return cls(url, *args, **kwargs)
63+
6064
@classmethod
6165
def create_for_uri(cls, uri, **kwargs):
6266
if isinstance(uri, dict):

run_tests.bat

+4-4
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ IF EXIST "%test_repo_path%" (
2424
RMDIR /Q /S %test_repo_path%
2525
)
2626

27-
%PYTHON%\\python.exe scripts/knowledge_repo --repo="${test_repo_path}" init
27+
%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://${test_repo_path}" init
2828
COPY tests\config_repo.yml %test_repo_path%\.knowledge_repo_config.yml
2929

3030
PUSHD %test_repo_path%
@@ -35,9 +35,9 @@ PUSHD %test_repo_path%
3535
POPD
3636

3737
# Add some knowledge_posts
38-
%PYTHON%\\python.exe scripts/knowledge_repo --repo="%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master
39-
%PYTHON%\\python.exe scripts/knowledge_repo --repo="%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master
40-
%PYTHON%\\python.exe scripts/knowledge_repo --repo="%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master
38+
%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master
39+
%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master
40+
%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master
4141

4242
REM "Running regression test suite"
4343
%PYTHON%\\python.exe -m nose --with-coverage --cover-package=knowledge_repo --verbosity=1

run_tests.sh

+8-8
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ echo "Creating a test repository in ${test_repo_path}..."
2424
# Remove the repository if it exists
2525
rm -rf ${test_repo_path} &> /dev/null
2626

27-
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" init
27+
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" init
2828
mkdir -p ${test_repo_path} &> /dev/null
2929
cp `dirname $0`/tests/config_repo.yml ${test_repo_path}/.knowledge_repo_config.yml &> /dev/null
3030

@@ -36,27 +36,27 @@ git commit -m "Update repository config." &> /dev/null
3636
popd &> /dev/null
3737

3838
# Add some knowledge_posts
39-
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master
40-
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master
41-
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master
39+
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master
40+
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master
41+
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master
4242

4343
for post in $(ls `dirname $0`/tests/test_posts); do
4444
if [[ "${post}" == *.ipynb ]]; then
45-
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
45+
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
4646
fi;
4747
if [[ "${post}" == *.Rmd ]]; then
48-
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
48+
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
4949
fi;
5050
if [[ "${post}" == *.md ]]; then
51-
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
51+
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
5252
fi;
5353
done
5454

5555
echo
5656
echo "Synchronising database index"
5757
echo "-----------------------------"
5858
echo
59-
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" reindex --config `dirname $0`/tests/config_server.py
59+
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" reindex --config `dirname $0`/tests/config_server.py
6060

6161
echo
6262
echo "Running regression test suite"

0 commit comments

Comments
 (0)