Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hack in fragile GitLab support #879

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions nbviewer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ class NBViewer(Application):
default_value="nbviewer.providers.gist.handlers.UserGistsHandler",
help="The Tornado handler to use for viewing directory containing all of a user's Gists",
).tag(config=True)
gitlab_handler = Unicode(
default_value="nbviewer.providers.gitlab.handlers.GitlabHandler",
help="The Tornado handler to use for viewing notebooks in a GitLab instance"
).tag(config=True)

answer_yes = Bool(
default_value=False,
Expand Down Expand Up @@ -627,6 +631,7 @@ def init_tornado_application(self):
local_handler=self.local_handler,
url_handler=self.url_handler,
user_gists_handler=self.user_gists_handler,
gitlab_handler=self.gitlab_handler,
)
handler_kwargs = {
"handler_names": handler_names,
Expand Down
6 changes: 3 additions & 3 deletions nbviewer/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------


default_providers = [
"nbviewer.providers.{}".format(prov) for prov in ["url", "github", "gist"]
"nbviewer.providers.{}".format(prov) for prov in ["url", "github", "gist", "gitlab"]
]

default_rewrites = [
"nbviewer.providers.{}".format(prov)
for prov in ["gist", "github", "dropbox", "url"]
for prov in ["gitlab", "gist", "github", "dropbox", "url"]
]


def provider_handlers(providers, **handler_kwargs):
"""Load tornado URL handlers from an ordered list of dotted-notation modules
which contain a `default_handlers` function
Expand Down
1 change: 1 addition & 0 deletions nbviewer/providers/gitlab/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .handlers import default_handlers, uri_rewrites
125 changes: 125 additions & 0 deletions nbviewer/providers/gitlab/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#-----------------------------------------------------------------------------
# Copyright (C) 2020 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
#-----------------------------------------------------------------------------

import json
import os
from urllib.parse import quote_plus
from tornado.httpclient import AsyncHTTPClient, HTTPClientError
from tornado.log import app_log
from ...utils import response_text


class GitlabClient(object):
"""Asynchronous client for a private GitLab instance using V4 REST API.

Please see https://docs.gitlab.com/ee/api/ for details."""

def __init__(self, host, token=None, client=None):
"""Init a GitlabClient.

host: str
token: optional str
This needs a private access token - if not provided, uses
environment variable GITLAB_TOKEN
client: AsyncHTTPClient
"""
self.client = client or AsyncHTTPClient()
self.host = host
self.token = token or os.environ.get("GITLAB_TOKEN")

@property
def api_url(self):
"""The base URL of the REST API."""
return "https://{host}/api/v4".format(host=self.host)

async def _fetch_json(self, url):
"""Fetch JSON content at URL."""
try:
response = await self.client.fetch(url)
text = response_text(response)
content = json.loads(text)
return content
except HTTPClientError as ex:
# log and raise because this can get lost in async
app_log.error(ex)
raise ex

async def projects(self, search=None):
"""List projects accessible on this GitLab instance."""
projects_url = ("{base}/projects"
"?private_token={token}"
"&simple=true"
.format(base=self.api_url, token=self.token))

if search is not None:
projects_url = projects_url + "&search={}".format(search)

return await self._fetch_json(projects_url)

async def tree(self, project_id, branch="master", path=None, recursive=False):
"""List all files in the given branch and project.

project_id: int or str
branch: optional str
path: optional str (defaults to root)
recursive: optional bool
"""
if type(project_id) is str:
project_id = quote_plus(project_id)

tree_url = ("{base}/projects/{project_id}/repository/tree"
"?private_token={token}"
"&recursive={recursive}"
"&ref={branch}"
"&per_page=1000"
.format(base=self.api_url,
project_id=project_id,
recursive=str(recursive),
branch=quote_plus(branch),
token=self.token))

if path is not None:
tree_url = "{url}&path={path}".format(url=tree_url,
path=quote_plus(path))

return await self._fetch_json(tree_url)

async def fileinfo(self, project_id, filepath, branch="master"):
"""Information for file in given branch and project.

project_id: int or str
branch: str
filepath: str
"""
if type(project_id) is str:
project_id = quote_plus(project_id)

file_url = ("{base}/projects/{project_id}/repository/files/{filepath}"
"?private_token={token}"
"&ref={branch}"
.format(base=self.api_url,
project_id=project_id,
branch=quote_plus(branch),
filepath=quote_plus(filepath),
token=self.token))
return await self._fetch_json(file_url)

def raw_file_url(self, project_id, blob_sha):
"""URL of the raw file matching given blob SHA in project.

project_id: int or str
blob_sha: str
"""
if type(project_id) is str:
project_id = quote_plus(project_id)

raw_url = ("{base}/projects/{project_id}"
"/repository/blobs/{blob_sha}/raw?private_token={token}")
return raw_url.format(base=self.api_url,
project_id=project_id,
blob_sha=blob_sha,
token=self.token)
174 changes: 174 additions & 0 deletions nbviewer/providers/gitlab/handlers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#-----------------------------------------------------------------------------
# Copyright (C) 2020 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
#-----------------------------------------------------------------------------

import json
import os
from tornado import web
from tornado.httpclient import HTTPClientError
from tornado.log import app_log
from ..base import RenderingHandler, cached
from ...utils import response_text
from .. import _load_handler_from_location
from .client import GitlabClient


class GitlabHandler(RenderingHandler):

async def lookup_notebook(self, client, group, repo, branch, filepath):
"""Attempt to find the notebook by searching project trees.
Used when an instance is misconfigured and paths are getting sanitised."""
projects = await client.projects(search=repo)

project = None
path_with_namespace = "{0}/{1}".format(group, repo)
for p in projects:
print(p["path_with_namespace"])
if p["path_with_namespace"] == path_with_namespace:
project = p
break
else:
raise Exception("Project path not found: " + path_with_namespace)

tree = await client.tree(project["id"], branch, recursive=True)

blob = None
for item in tree:
if item["path"] == filepath:
blob = item
break
else:
raise Exception("Blob not found: " + filepath)

return client.raw_file_url(project["id"], blob["id"])

async def get_notebook_data(self, client, group, repo, branch, filepath):
path_with_namespace = "{group}/{repo}".format(group=group, repo=repo)

try:
fileinfo = await client.fileinfo(path_with_namespace, filepath, branch)
return client.raw_file_url(path_with_namespace, fileinfo["blob_id"])
except HTTPClientError as http_error:
if http_error.code == 404:
try:
# Sometimes the url-encoded paths get sanitized, so give this a try
app_log.warn("Unable to access {filepath} in {path_with_namespace} directly, attempting lookup"
.format(filepath=filepath,
path_with_namespace=path_with_namespace))
return await self.lookup_notebook(client, group, repo, branch, filepath)
except Exception as e:
app_log.error(e)
else:
app_log.error(http_error)
except Exception as e:
app_log.error(e)

async def deliver_notebook(self, host, group, repo, branch, path, remote_url):
response = await self.fetch(remote_url)

base_url = ("/gitlab/{host}/{group}/{repo}/tree/{branch}/"
.format(host=host,
group=group,
repo=repo,
branch=branch))

breadcrumbs = [{"url": base_url, "name": repo}]
dirpath = path.rsplit('/', 1)[0]
breadcrumbs.extend(self.breadcrumbs(dirpath, base_url))

try:
nbjson = response_text(response, encoding='utf-8')
except UnicodeDecodeError:
app_log.error("Notebook is not utf8: %s", remote_url, exc_info=True)
raise web.HTTPError(400)

await self.finish_notebook(nbjson,
download_url=remote_url,
msg="file from url: " + remote_url,
public=False,
breadcrumbs=breadcrumbs,
request=self.request)

def render_dirview_template(self, entries, title, breadcrumbs):
return self.render_template('dirview.html',
entries=entries,
breadcrumbs=breadcrumbs,
title=title)

async def show_dir(self, client, group, repo, branch, dirpath):
path_with_namespace = "{group}/{repo}".format(group=group, repo=repo)
tree = await client.tree(path_with_namespace, branch, dirpath)

full_url = "/gitlab/{host}/{group}/{repo}/{path_type}/{branch}/{path}"
external_url = "https://{host}/{group}/{repo}/{path_type}/{branch}/{path}"

base_url = ("/gitlab/{host}/{group}/{repo}/tree/{branch}/"
.format(host=client.host,
group=group,
repo=repo,
branch=branch))

breadcrumbs = [{"url": base_url, "name": repo}]
breadcrumbs.extend(self.breadcrumbs(dirpath, base_url))

entries = []
for item in tree:
if item["type"] == "tree":
entry_class = "fa fa-folder-open"
url = item["path"]
elif item["type"] == "blob" and item["path"].endswith("ipynb"):
entry_class = "fa fa-book"
url = full_url.format(host=client.host,
group=group,
repo=repo,
path_type="blob",
branch=branch,
path=item["path"])
else:
entry_class = "fa fa-share"
url = external_url.format(host=client.host,
group=group,
repo=repo,
path_type="blob",
branch=branch,
path=item["path"])

entries.append({"name": item["name"],
"url": url,
"class": entry_class})

html = self.render_dirview_template(entries=entries,
title=dirpath,
breadcrumbs=breadcrumbs)
await self.cache_and_finish(html)

@cached
async def get(self, host, group, repo, path_type, branch, path):
client = GitlabClient(host)
if path_type == "blob":
raw_url = await self.get_notebook_data(client, group, repo, branch, path)
await self.deliver_notebook(host, group, repo, branch, path, raw_url)
else:
await self.show_dir(client, group, repo, branch, path)

def uri_rewrites(rewrites=[]):
gitlab_rewrites = [
(r'^https?://(gitlab\..*)$', r'/gitlab/{0}'),
(r'^/url[s]?/(gitlab\..*)$', r'/gitlab/{0}'),
(r'^/url[s]?/https?://(gitlab\..*)$', r'/gitlab/{0}'),
]
return rewrites + gitlab_rewrites

def default_handlers(handlers=[], **handler_names):
gitlab_handler = _load_handler_from_location(handler_names['gitlab_handler'])
return handlers + [
(r'/gitlab/(?P<host>[\w_\-.]+)'
'/(?P<group>[\w_\-.]+)'
'/(?P<repo>[\w_\-]+)'
'/(?P<path_type>blob|tree)'
'/(?P<branch>[\w_\-()]+)'
'/(?P<path>.*)', gitlab_handler, {}),
]