diff --git a/.gitignore b/.gitignore index 0fc065b..5fd9ad5 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ build/ dist/ *.egg-info *_testing.py +.pytest_cache # pipenv .env diff --git a/notion/renderer.py b/notion/renderer.py index f6f5f53..8a20bca 100644 --- a/notion/renderer.py +++ b/notion/renderer.py @@ -1,8 +1,67 @@ -import markdown2 +import mistletoe +from mistletoe import block_token, span_token +from mistletoe.html_renderer import HTMLRenderer as MistletoeHTMLRenderer import requests +import dominate +from dominate.tags import * +from dominate.util import raw +from dominate.dom_tag import dom_tag +from more_itertools import flatten from .block import * +from .collection import Collection + +#This is the minimal css stylesheet to apply to get +#decent lookint output, it won't make it look exactly like Notion.so +#but will have the same basic structure +HTMLRendererStyles = """ + +""" +class MistletoeHTMLRendererSpanTokens(MistletoeHTMLRenderer): + """ + Renders Markdown to HTML without any MD block tokens (like blockquote or code) + except for the paragraph block token, because you need at least one + """ + + def __enter__(self): + ret = super().__enter__() + for tokenClsName in block_token.__all__[:-1]: #All but Paragraph token + block_token.remove_token(getattr(block_token, tokenClsName)) + span_token.remove_token(span_token.AutoLink) #don't autolink urls in markdown + return ret + # Auto resets tokens in __exit__, so no need to readd the tokens anywhere + + def render_paragraph(self, token): + """ + Only used for span tokens, so don't render out anything + """ + return self.render_inner(token) class BaseRenderer(object): @@ -10,232 +69,284 @@ def __init__(self, start_block): self.start_block = start_block def render(self): - return self.render_block(self.start_block) + pass + + def render_block(self, block): + pass + +def renderMD(mdStr): + """ + Render the markdown string to HTML, wrapped with dominate "raw" so Dominate + renders it straight to HTML. + """ + #[:-1] because it adds a newline for some reason + #TODO: Follow up on this and make it more robust + #https://github.com/miyuchina/mistletoe/blob/master/mistletoe/block_token.py#L138-L152 + return raw(mistletoe.markdown(mdStr, MistletoeHTMLRendererSpanTokens)[:-1]) + +def href_for_block(block): + """ + Gets the href for a given block + """ + return f'https://www.notion.so/{block.id.replace("-", "")}' + +def handles_children_rendering(func): + setattr(func, 'handles_children_rendering', True) + return func - def calculate_child_indent(self, block): - if block.type == "page": - return 0 - else: - return 1 +class BaseHTMLRenderer(BaseRenderer): + """ + BaseRenderer for HTML output, uses [Dominate](https://github.com/Knio/dominate) + internally for generating HTML output + Each token rendering method should create a dominate tag and it automatically + gets added to the parent context (because of the with statement). If you return + a given tag, it will be used as the parent container for all rendered children + """ + + def __init__(self, start_block, render_linked_pages=False, render_sub_pages=True, + render_table_pages_after_table=False, with_styles=False): + """ + start_block The root block to render from + follow_links Whether to follow "Links to pages" + """ + self.exclude_ids = [] #TODO: Add option for this + self.start_block = start_block + self.render_linked_pages = render_linked_pages + self.render_sub_pages = render_sub_pages + self.render_table_pages_after_table = render_table_pages_after_table + self.with_styles = with_styles + + self._render_stack = [] + + def render(self, **kwargs): + """ + Renders the HTML, kwargs takes kwargs for Dominate's render() function + https://github.com/Knio/dominate#rendering + + These can be: + `pretty` - Whether or not to be pretty + `indent` - Indent character to use + `xhtml` - Whether or not to use XHTML instead of HTML (
instead of
) + """ + els = self.render_block(self.start_block) + #Strings render as themselves, DOMinate tags user the passed kwargs + return (HTMLRendererStyles if self.with_styles else "") + \ + "".join(el.render(**kwargs) if isinstance(el, dom_tag) else el for el in els) + + def get_parent_el(self): + """ + Gets the current parent off the render stack + """ + if not self._render_stack: + return None + return self._render_stack[-1] + + def get_previous_sibling_el(self): + """ + Gets the previous sibling element in the rendered HTML tree + """ + parentEl = self.get_parent_el() + if not parentEl or not parentEl.children: + return None #No parent or no siblings + return parentEl.children[-1] + + def render_block(self, block): + if block.id in self.exclude_ids: + return [] #don't render this block - def render_block(self, block, level=0, preblock=None, postblock=None): assert isinstance(block, Block) - type_renderer = getattr(self, "handle_" + block._type, None) + type_renderer = getattr(self, "render_" + block._type, None) if not callable(type_renderer): - if hasattr(self, "handle_default"): - type_renderer = self.handle_default + if hasattr(self, "render_default"): + type_renderer = self.render_default else: raise Exception("No handler for block type '{}'.".format(block._type)) - pretext = type_renderer(block, level=level, preblock=preblock, postblock=postblock) - if isinstance(pretext, tuple): - pretext, posttext = pretext - else: - posttext = "" - return pretext + self.render_children(block, level=level+self.calculate_child_indent(block)) + posttext - - def render_children(self, block, level): - kids = block.children - if not kids: - return "" - text = "" - for i in range(len(kids)): - text += self.render_block(kids[i], level=level) - return text - - -bookmark_template = """ -
-
- -
-
-
{title}
-
{description}
-
- -
{link}/div> -
-
-
-
-
-
-
-
-
-
-
-""" - -callout_template = """ -
-
-
-
-
{icon}
-
-
-
-
{title}
-
-""" - -class BaseHTMLRenderer(BaseRenderer): - - def create_opening_tag(self, tagname, attributes={}): - attrs = "".join(' {}="{}"'.format(key, val) for key, val in attributes.items()) - return "<{tagname}{attrs}>".format(tagname=tagname, attrs=attrs) - - def wrap_in_tag(self, block, tagname, fieldname="title", attributes={}): - opentag = self.create_opening_tag(tagname, attributes) - innerhtml = markdown2.markdown(getattr(block, fieldname)) - return "{opentag}{innerhtml}".format(opentag=opentag, tagname=tagname, innerhtml=innerhtml) - - def left_margin_for_level(self, level): - return {"display": "margin-left: {}px;".format(level * 20)} - - def handle_default(self, block, level=0, preblock=None, postblock=None): - return self.wrap_in_tag(block, "p", attributes=self.left_margin_for_level(level)) - - def handle_divider(self, block, level=0, preblock=None, postblock=None): - return "
" - - def handle_column_list(self, block, level=0, preblock=None, postblock=None): - return '
', '
' - - def handle_column(self, block, level=0, preblock=None, postblock=None): - buffer = (len(block.parent.children) - 1) * 46 - width = block.get("format.column_ratio") - return '
'.format(buffer, width), '
' - - def handle_to_do(self, block, level=0, preblock=None, postblock=None): - return '
'.format( - id="chk_" + block.id, - checked=" checked" if block.checked else "", - title=block.title, - ) - - def handle_code(self, block, level=0, preblock=None, postblock=None): - return self.wrap_in_tag(block, "code", attributes=self.left_margin_for_level(level)) - - def handle_factory(self, block, level=0, preblock=None, postblock=None): - return "" - - def handle_header(self, block, level=0, preblock=None, postblock=None): - return self.wrap_in_tag(block, "h2", attributes=self.left_margin_for_level(level)) - - def handle_sub_header(self, block, level=0, preblock=None, postblock=None): - return self.wrap_in_tag(block, "h3", attributes=self.left_margin_for_level(level)) - - def handle_sub_sub_header(self, block, level=0, preblock=None, postblock=None): - return self.wrap_in_tag(block, "h4", attributes=self.left_margin_for_level(level)) - - def handle_page(self, block, level=0, preblock=None, postblock=None): - return self.wrap_in_tag(block, "h1", attributes=self.left_margin_for_level(level)) - - def handle_bulleted_list(self, block, level=0, preblock=None, postblock=None): - text = "" - if preblock is None or preblock.type != "bulleted_list": - text = self.create_opening_tag("ul", attributes=self.left_margin_for_level(level)) - text += self.wrap_in_tag(block, "li") - if postblock is None or postblock.type != "bulleted_list": - text += "" - return text - - def handle_numbered_list(self, block, level=0, preblock=None, postblock=None): - text = "" - if preblock is None or preblock.type != "numbered_list": - text = self.create_opening_tag("ol", attributes=self.left_margin_for_level(level)) - text += self.wrap_in_tag(block, "li") - if postblock is None or postblock.type != "numbered_list": - text += "" - return text - - def handle_toggle(self, block, level=0, preblock=None, postblock=None): - innerhtml = markdown2.markdown(block.title) - opentag = self.create_opening_tag("details", attributes=self.left_margin_for_level(level)) - return '{opentag}{innerhtml}'.format(opentag=opentag, innerhtml=innerhtml), '' - - def handle_quote(self, block, level=0, preblock=None, postblock=None): - return self.wrap_in_tag(block, "blockquote", attributes=self.left_margin_for_level(level)) - - def handle_text(self, block, level=0, preblock=None, postblock=None): - return self.handle_default(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_equation(self, block, level=0, preblock=None, postblock=None): - text = self.create_opening_tag("p", attributes=self.left_margin_for_level(level)) - return text + '

'.format(block.latex) - - def handle_embed(self, block, level=0, preblock=None, postblock=None): - iframetag = self.create_opening_tag("iframe", attributes={ - "src": block.display_source or block.source, - "frameborder": 0, - "sandbox": "allow-scripts allow-popups allow-forms allow-same-origin", - "allowfullscreen": "", - "style": "width: {width}px; height: {height}px; border-radius: 1px;".format(width=block.width, height=block.height), - }) - return '
' + iframetag + "
" - - def handle_video(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_file(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_audio(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_pdf(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_image(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_bookmark(self, block, level=0, preblock=None, postblock=None): - return bookmark_template.format(link=block.link, title=block.title, description=block.description, icon=block.bookmark_icon, cover=block.bookmark_cover) - - def handle_link_to_collection(self, block, level=0, preblock=None, postblock=None): - return self.wrap_in_tag(block, "p", attributes={"href": "https://www.notion.so/" + block.id.replace("-", "")}) - - def handle_breadcrumb(self, block, level=0, preblock=None, postblock=None): - return self.wrap_in_tag(block, "p", attributes=self.left_margin_for_level(level)) - - def handle_collection_view(self, block, level=0, preblock=None, postblock=None): - return self.wrap_in_tag(block, "p", attributes={"href": "https://www.notion.so/" + block.id.replace("-", "")}) - - def handle_collection_view_page(self, block, level=0, preblock=None, postblock=None): - return self.wrap_in_tag(block, "p", attributes={"href": "https://www.notion.so/" + block.id.replace("-", "")}) - - def handle_framer(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_tweet(self, block, level=0, preblock=None, postblock=None): + class_function = getattr(self.__class__, type_renderer.__name__) + + #Render ourselves to a Dominate HTML element + els = type_renderer(block) #Returns a list of elements + + #If the block has no children, or the called function handles the child + #rendering itself, don't render the children + if not block.children or hasattr(class_function, 'handles_children_rendering'): + return els + + #Otherwise, render and use the default append as a children-list + return els + self.render_blocks_into(block.children, None) + + def render_blocks_into(self, blocks, containerEl=None): + if containerEl is None: #Default behavior is to add a container for the children + containerEl = div(_class='children-list') + self._render_stack.append(containerEl) + for block in blocks: + els = self.render_block(block) + containerEl.add(els) + self._render_stack.pop() + return [containerEl] + + # == Conversions for rendering notion-py block types to elemenets == + # Each function should return a list containing dominate tags or a string of HTML + # Marking a function with handles_children_rendering means it handles rendering + # it's own `.children` and doesn't need to perform the default rendering + + def render_default(self, block): + return [p(renderMD(block.title))] + + def render_divider(self, block): + return [hr()] + + @handles_children_rendering + def render_column_list(self, block): + return self.render_blocks_into(block.children, div(style='display: flex;', _class='column-list')) + + @handles_children_rendering + def render_column(self, block): + return self.render_blocks_into(block.children, div(_class='column')) + + def render_to_do(self, block): + id = f'chk_{block.id}' + return [input( \ + label(_for=id), \ + type='checkbox', id=id, checked=block.checked, title=block.title)] + + def render_code(self, block): + #TODO: Do we want this to support Markdown? I think there's a notion-py + #change that might affect this... (the unstyled-title or whatever) + return [pre(code(block.title))] + + def render_factory(self, block): + return [] + + def render_header(self, block): + return [h2(renderMD(block.title))] + + def render_sub_header(self, block): + return [h3(renderMD(block.title))] + + def render_sub_sub_header(self, block): + return [h4(renderMD(block.title))] + + @handles_children_rendering + def render_page(self, block): + #TODO: I would use isinstance(xxx, CollectionRowBlock) here but it's buggy + #https://github.com/jamalex/notion-py/issues/103 + if isinstance(block.parent, Collection): #If it's a child of a collection (CollectionRowBlock) + if not self.render_table_pages_after_table: + return [] + return [h3(renderMD(block.title))] + self.render_blocks_into(block.children) + elif block.parent.id != block.get()['parent_id']: + #A link is a PageBlock where the parent id doesn't equal the _actual_ parent id + #of the block + if not self.render_linked_pages: + #Render only the link, none of the content in the link + return [a(h4(renderMD(block.title)), href=href_for_block(block))] + else: #A normal PageBlock + if not self.render_sub_pages and self._render_stack: + return [h4(renderMD(block.title))] #Subpages when not rendering them render like in Notion, as a simple heading + + #Otherwise, render a page normally in it's entirety + #TODO: This should probably not use a "children-list" but we need to refactor + #the _render_stack to make that work... + return [h1(renderMD(block.title))] + self.render_blocks_into(block.children) + + @handles_children_rendering + def render_bulleted_list(self, block): + previousSibling = self.get_previous_sibling_el() + previousSiblingIsUl = previousSibling and isinstance(previousSibling, ul) + containerEl = previousSibling if previousSiblingIsUl else ul() #Make a new ul if there's no previous ul + + blockEl = li(renderMD(block.title)) + containerEl.add(blockEl) #Render out ourself into the stack + self.render_blocks_into(block.children, containerEl) + return [] if containerEl.parent else [containerEl] #Only return if it's not in the rendered output yet + + @handles_children_rendering + def render_numbered_list(self, block): + previousSibling = self.get_previous_sibling_el() + previousSiblingIsOl = previousSibling and isinstance(previousSibling, ol) + containerEl = previousSibling if previousSiblingIsOl else ol() #Make a new ol if there's no previous ol + + blockEl = li(renderMD(block.title)) + containerEl.add(blockEl) #Render out ourself into the stack + self.render_blocks_into(block.children, containerEl) + return [] if containerEl.parent else [containerEl] #Only return if it's not in the rendered output yet + + def render_toggle(self, block): + return [details(summary(renderMD(block.title)))] + + def render_quote(self, block): + return [blockquote(renderMD(block.title))] + + render_text = render_default + + def render_equation(self, block): + return [p(img(src=f'https://chart.googleapis.com/chart?cht=tx&chl={block.latex}'))] + + def render_embed(self, block): + return [iframe(src=block.display_source or block.source, frameborder=0, + sandbox='allow-scripts allow-popups allow-forms allow-same-origin', + allowfullscreen='')] + + def render_video(self, block): + #TODO, this won't work if there's no file extension, we might have + #to query and get the MIME type... + src = block.display_source or block.source + srcType = src.split('.')[-1] + return [video(source(src=src, type=f"video/{srcType}"), controls=True)] + + render_file = render_embed + render_pdf = render_embed + + def render_audio(self, block): + return [audio(src=block.display_source or block.source, controls=True)] + + def render_image(self, block): + attrs = {} + if block.caption: # Add the alt attribute if there's a caption + attrs['alt'] = block.caption + return [img(src=block.display_source or block.source, **attrs)] + + def render_bookmark(self, block): + #return bookmark_template.format(link=, title=block.title, description=block.description, icon=block.bookmark_icon, cover=block.bookmark_cover) + #TODO: It's just a social share card for the website we're bookmarking + return [a(href="block.link")] + + def render_link_to_collection(self, block): + return [a(renderMD(block.title), href=href_for_block(block))] + + def render_breadcrumb(self, block): + return [p(renderMD(block.title))] + + def render_collection_view_page(self, block): + print("TEST") + return [a(renderMD(block.title), href=href_for_block(block))] + + render_framer = render_embed + + def render_tweet(self, block): + #TODO: Convert to a list or something return requests.get("https://publish.twitter.com/oembed?url=" + block.source).json()["html"] - def handle_gist(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_drive(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_figma(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_loom(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_typeform(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_codepen(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_maps(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_invision(self, block, level=0, preblock=None, postblock=None): - return self.handle_embed(block=block, level=level, preblock=preblock, postblock=postblock) - - def handle_callout(self, block, level=0, preblock=None, postblock=None): - return callout_template.format(icon=block.icon, title=markdown2.markdown(block.title)) - + render_gist = render_embed + render_drive = render_embed + render_figma = render_embed + render_loom = render_embed + render_typeform = render_embed + render_codepen = render_embed + render_maps = render_embed + render_invision = render_embed + + def render_callout(self, block): + return [div( \ + div(block.icon, _class="icon") + div(renderMD(block.title), _class="text"), \ + _class="callout")] + + def render_collection_view(self, block): + #Render out the table itself + #TODO + + #Render out all the embedded PageBlocks + if not self.render_table_pages_after_table: + return [] #Don't render out any of the internal pages + + return [h2(block.title)] + self.render_blocks_into(block.collection.get_rows()) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c25cb42..3a5b1de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ tzlocal python-slugify dictdiffer cached-property -markdown2 \ No newline at end of file +markdown2 +mistletoe \ No newline at end of file diff --git a/tests/test_renderer.py b/tests/test_renderer.py new file mode 100644 index 0000000..a2e64ae --- /dev/null +++ b/tests/test_renderer.py @@ -0,0 +1,143 @@ +''' +Tests for notion-py renderer +''' +import uuid +from functools import partial +import pytest +from notion.renderer import BaseHTMLRenderer +from notion.block import TextBlock, BulletedListBlock, PageBlock, NumberedListBlock, \ + ImageBlock, ColumnBlock, ColumnListBlock +from unittest.mock import Mock, PropertyMock + +def MockSpace(pages=[]): + #TODO: Doesn't operate at all like *Block types... + spaceMock = Mock() + spaceMock.pages = pages + spaceMock.id = uuid.uuid4() + for page in pages: + type(page).parent = PropertyMock(return_value = spaceMock) + return spaceMock +testSpace = MockSpace() + +def BlockMock(blockType, inputDict, children=[]): + global testSpace + + blockMock = Mock(spec=blockType) + blockMock._type = blockType._type + blockMock.__dict__.update(inputDict) + blockMock.id = uuid.uuid4() + blockMock.get = Mock(return_value={}) + blockMock.children = children + if issubclass(blockType, PageBlock): + #PageBlocks always need a parent, might be overwritten later + type(blockMock).parent = PropertyMock(return_value = testSpace) + blockMock.get = Mock(return_value={ + 'parent_id': testSpace.id + }) + + #Setup children references if passed + for child in children: + #Can't set a mock on a property of a mock in a circular relationship + #or it messes up so use PropertyMock + type(child).parent = PropertyMock(return_value = blockMock) + child.get = Mock(return_value = { + 'parent_id': blockMock.id + }) + return blockMock + +for blockCls in [TextBlock, BulletedListBlock, PageBlock, NumberedListBlock, \ + ImageBlock, ColumnBlock, ColumnListBlock]: + globals()["Mock" + blockCls.__name__] = partial(BlockMock, blockCls) + +def test_TextBlock(): + '''it renders a TextBlock''' + #arrange + block = MockTextBlock({ 'title': 'Hold up, lemme test this block...' }) + + #act + output = BaseHTMLRenderer(block).render(pretty=False) + + #assert + assert output == '

Hold up, lemme test this block...

' + +def test_BulletedListBlock(): + '''it renders BulletedListBlocks''' + #arrange + block = MockPageBlock({ 'title': 'Test Page' }, [ + MockBulletedListBlock({ 'title': ':3' }), + MockBulletedListBlock({ 'title': ':F' }), + MockBulletedListBlock({ 'title': '>:D'}) + ]) + + #act + output = BaseHTMLRenderer(block).render(pretty=False) + + #assert + assert output == '

Test Page

' + +def test_BulletedListBlockNested(): + '''it renders BulletedListBlocks''' + #arrange + block = MockPageBlock({ 'title': 'Test Page' }, [ + MockBulletedListBlock({ 'title': 'owo' }, [ + MockBulletedListBlock({ 'title': 'OwO' }) + ]) + ]) + + #act + output = BaseHTMLRenderer(block).render(pretty=False) + + #assert + assert output == '

Test Page

' + +def test_NumberedListBlock(): + '''it renders NumberedListBlocks''' + #arrange + block = MockPageBlock({ 'title': 'Test Page' }, [ + MockNumberedListBlock({ 'title': ':3' }), + MockNumberedListBlock({ 'title': ':F' }), + MockNumberedListBlock({ 'title': '>:D'}) + ]) + + #act + output = BaseHTMLRenderer(block).render(pretty=False) + + #assert + assert output == '

Test Page

  1. :3
  2. :F
  3. >:D
' + +def test_ImageBlock(): + '''it renders an ImageBlock''' + #arrange + block = MockImageBlock({ + 'caption': 'Its a me! Placeholderio', + 'display_source': 'https://via.placeholder.com/20x20', + 'source': 'https://via.placeholder.com/20x20' + }) + + #act + output = BaseHTMLRenderer(block).render(pretty=False) + + #assert + assert output == 'Its a me! Placeholderio' + +def test_ColumnList(): + '''it renders a ColumnList''' + #arrange + block = MockColumnListBlock({},[ + MockColumnBlock({},[ + MockTextBlock({ 'title': 'Whats wrong Jimmykun?' }) + ]), + MockColumnBlock({},[ + MockTextBlock({ 'title': 'Could it be that youre' }), + MockTextBlock({ 'title': 'craving my, c r o i s s a n t?' }), + ]) + ]) + + #act + output = BaseHTMLRenderer(block).render(pretty=False) + + #assert + assert output == '
' + \ + '

Whats wrong Jimmykun?

' + \ + '

Could it be that youre

craving my, c r o i s s a n t?

' \ + '
' \ No newline at end of file