Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions lib/eml/compiler.ex
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,10 @@ defmodule Eml.Compiler do
try do
{ :safe, concat(buffer, "", opts) }
catch
:throw, { :illegal_quoted, stacktrace } ->
:throw, :illegal_quoted ->
reraise Eml.CompileError,
[message: "It's only possible to pass assigns to templates or components when using &"],
stacktrace
__STACKTRACE__
end
end

Expand All @@ -232,7 +232,7 @@ defmodule Eml.Compiler do
{ :safe, chunk } ->
acc <> chunk
_ ->
throw { :illegal_quoted, System.stacktrace() }
throw :illegal_quoted
end
end

Expand Down
4 changes: 2 additions & 2 deletions lib/eml/element/generator.ex
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,9 @@ defmodule Eml.Element.Generator do
{ attrs, content }
end
case { content_or_attrs, maybe_content } do
{ [{ :do, {:"__block__", _, content}}], _ } -> init.(nil, content, in_match)
{ [{ :do, {:__block__, _, content}}], _ } -> init.(nil, content, in_match)
{ [{ :do, content}], _ } -> init.(nil, List.wrap(content), in_match)
{ attrs, [{ :do, {:"__block__", _, content}}] } -> init.(attrs, content, in_match)
{ attrs, [{ :do, {:__block__, _, content}}] } -> init.(attrs, content, in_match)
{ attrs, [{ :do, content}] } -> init.(attrs, List.wrap(content), in_match)
{ [{ _, _ } | _] = attrs, nil } -> init.(attrs, nil, in_match)
{ attrs, nil } when in_match -> init.(attrs, nil, in_match)
Expand Down
12 changes: 12 additions & 0 deletions lib/eml/encoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@ defimpl Eml.Encoder, for: Tuple do
raise Protocol.UndefinedError, protocol: Eml.Encoder, value: { :safe, data }
end
end

# for our use case we allow cdata to be included in the written HTML again,
# since we only use Eml for parsing our own generated code, which must be
# safe by other means already
def encode({ :cdata, data }) do
if is_binary(data) do
{ :safe, data }
else
raise Protocol.UndefinedError, protocol: Eml.Encoder, value: { :safe, data }
end
end

def encode(data) do
if Macro.validate(data) == :ok do
data
Expand Down
80 changes: 80 additions & 0 deletions lib/eml/html/ldc_special_parser.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
defmodule Eml.HTML.LDCSpecialParser do
def filter_empty_content(parsed_html) do
parsed_html
|> Enum.map(&filter_element/1)
end

defp filter_element(element) when is_binary(element), do: element
defp filter_element(%Eml.Element{} = element) do
filtered_content = element.content
|> filter_content()
|> case do
nil -> nil
[] -> element.content
[first_el | rest_content] ->
filter_empty_content(nil, nil, first_el, rest_content, [])
|> Enum.reject(&is_nil/1)
|> Enum.reverse()
content when is_binary(content) -> content
end

%{element | content: filtered_content}
end
# CDATA needs special handling, wasn't handeld by original Eml implementation at all?
defp filter_element({:cdata, _} = element), do: element

defp filter_content(nil), do: nil
defp filter_content(content) when is_list(content), do: Enum.map(content, &filter_element/1)
defp filter_content(content) when is_binary(content), do: maybe_trim_whitespace_content(content)


defp filter_empty_content(%Eml.Element{} = prev_el, this_el, %Eml.Element{} = next_el, more_content, acc) when is_binary(this_el) do
if String.starts_with?(to_string(prev_el.tag), "ldc") and String.starts_with?(to_string(next_el.tag), "ldc") do
acc = [prev_el | acc]

case more_content do
[] ->
[next_el | [this_el | acc]]
[more_el | rest_content] ->
# ensure that this_el is not trimmed in another call to filter_empty_content, so just skip it
filter_empty_content(nil, next_el, more_el, rest_content, [this_el | acc])
end
else
if String.trim(this_el) === "" do
case more_content do
[] ->
[next_el | [prev_el | acc]]
[more_el | rest_content] ->
filter_empty_content(prev_el, next_el, more_el, rest_content, acc)
end
else
case more_content do
[] ->
[next_el | [this_el | [prev_el | acc]]]
[more_el | rest_content] ->
acc = [prev_el | acc]
filter_empty_content(this_el, next_el, more_el, rest_content, acc)
end
end
end
end

defp filter_empty_content(prev_el, this_el, next_el, [], acc) do
prev_el = maybe_trim_whitespace_content(prev_el)
this_el = maybe_trim_whitespace_content(this_el)
next_el = maybe_trim_whitespace_content(next_el)

acc = if prev_el === "", do: acc, else: [prev_el | acc]
acc = if this_el === "", do: acc, else: [this_el | acc]
if next_el === "", do: acc, else: [next_el | acc]
end
defp filter_empty_content(prev_el, this_el, next_el, [more_el | rest_content], acc), do:
filter_empty_content(this_el, next_el, more_el, rest_content, [maybe_trim_whitespace_content(prev_el) | acc])

defp maybe_trim_whitespace_content(content) when is_binary(content) do
trimmed = String.trim(content)
if trimmed === "", do: trimmed, else: content
end
defp maybe_trim_whitespace_content(element), do: element

end
93 changes: 54 additions & 39 deletions lib/eml/html/parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@ defmodule Eml.HTML.Parser do

@spec parse(binary, Keyword.t) :: [Eml.t]
def parse(html, opts \\ []) do
res = tokenize(html, { :blank, [] }, [], :blank, opts) |> parse_content()
res = tokenize(html, { :blank, [] }, [], :blank, opts) |> parse_content(opts)

case res do
{ content, [] } ->
content
app_default = Application.get_env(:eml, :remove_empty_content_between_non_ldc_tags, true)
remove_whitespace = Keyword.get(opts, :remove_empty_content_between_non_ldc_tags, app_default)

if remove_whitespace, do: Eml.HTML.LDCSpecialParser.filter_empty_content(content), else: content
{ content, rest }->
raise Eml.ParseError, message: "Unparsable content, parsed: #{inspect content}, rest: #{inspect rest}"
end
Expand Down Expand Up @@ -60,10 +64,10 @@ defmodule Eml.HTML.Parser do
n = byte_size(end_token)
case chars do
<<^end_token::binary-size(n), rest::binary>> ->
acc = change(buf, acc, :cdata)
acc = change({ :open, "<" }, acc)
acc = change({ :slash, "/" }, acc)
acc = change({ :end_tag, end_tag }, acc)
acc = change(buf, acc, opts, :cdata)
acc = change({ :open, "<" }, acc, opts)
acc = change({ :slash, "/" }, acc, opts)
acc = change({ :end_tag, end_tag }, acc, opts)
tokenize(rest, { :end_close, ">" }, acc, :end_close, opts)
<<char>> <> rest ->
consume(char, rest, buf, acc, state, opts)
Expand Down Expand Up @@ -157,7 +161,7 @@ defmodule Eml.HTML.Parser do
:content ->
next(rest, buf, ">", acc, :start_close, opts)
{ :cdata, tag } ->
acc = change(buf, acc)
acc = change(buf, acc, opts)
next(rest, { :start_close, ">" }, "", acc, { :cdata, tag }, opts)
end
end
Expand Down Expand Up @@ -220,28 +224,28 @@ defmodule Eml.HTML.Parser do

# Add the old buffer to the accumulator and start a new buffer
defp next(rest, old_buf, new_buf, acc, new_state, opts) do
acc = change(old_buf, acc)
acc = change(old_buf, acc, opts)
new_buf = if is_integer(new_buf), do: <<new_buf>>, else: new_buf
tokenize(rest, { new_state, new_buf }, acc, new_state, opts)
end

# Add buffer to the accumulator if its content is not empty.
defp change({ type, buf }, acc, type_modifier \\ nil) do
defp change({ type, buf }, acc, opts, type_modifier \\ nil) when is_list(opts) do
type = if is_nil(type_modifier), do: type, else: type_modifier
token = { type, buf }
if empty?(token) do
if empty?(token, opts) do
acc
else
[token | acc]
end
end

# Checks for empty content
defp empty?({ :blank, _ }), do: true
defp empty?({ :content, content }) do
String.trim(content) === ""
defp empty?({ :blank, _ }, _opts), do: true
defp empty?({ :content, content }, opts) do
if get_trim_whitespace_opt(opts), do: String.trim(content) === "", else: false
end
defp empty?(_), do: false
defp empty?(_, _opts), do: false

# Checks if last tokenized tag is a tag that should always close.
defp get_last_tag(tokens, { type, buf }) do
Expand Down Expand Up @@ -285,59 +289,59 @@ defmodule Eml.HTML.Parser do

# Parse the genrated tokens

defp parse_content(tokens) do
parse_content(tokens, [])
defp parse_content(tokens, opts) do
parse_content(tokens, [], opts)
end

defp parse_content([{ type, token } | ts], acc) do
defp parse_content([{ type, token } | ts], acc, opts) do
case preparse(type, token) do
:skip ->
parse_content(ts, acc)
parse_content(ts, acc, opts)
{ :tag, tag } ->
{ element, tokens } = parse_element(ts, [tag: tag, attrs: [], content: []])
parse_content(tokens, [element | acc])
{ element, tokens } = parse_element(ts, [tag: tag, attrs: [], content: []], opts)
parse_content(tokens, [element | acc], opts)
{ :content, content } ->
parse_content(ts, [content | acc])
parse_content(ts, [content | acc], opts)
{ :cdata, content } ->
# tag cdata in order to skip whitespace trimming
parse_content(ts, [{ :cdata, content } | acc])
parse_content(ts, [{ :cdata, content } | acc], opts)
:end_el ->
{ :lists.reverse(acc), ts }
end
end
defp parse_content([], acc) do
defp parse_content([], acc, _opts) do
{ :lists.reverse(acc), [] }
end

defp parse_element([{ type, token } | ts], acc) do
defp parse_element([{ type, token } | ts], acc, opts) do
case preparse(type, token) do
:skip ->
parse_element(ts, acc)
parse_element(ts, acc, opts)
{ :attr_field, field } ->
attrs = [{ field, "" } | acc[:attrs]]
parse_element(ts, Keyword.put(acc, :attrs, attrs))
parse_element(ts, Keyword.put(acc, :attrs, attrs), opts)
{ :attr_value, value } ->
[{ field, current } | rest] = acc[:attrs]
attrs = if is_binary(current) && is_binary(value) do
[{ field, current <> value } | rest]
else
[{ field, List.wrap(current) ++ [value] } | rest]
end
parse_element(ts, Keyword.put(acc, :attrs, attrs))
parse_element(ts, Keyword.put(acc, :attrs, attrs), opts)
:start_content ->
{ content, tokens } = parse_content(ts, [])
{ make_element(Keyword.put(acc, :content, content)), tokens }
{ content, tokens } = parse_content(ts, [], opts)
{ make_element(Keyword.put(acc, :content, content), opts), tokens }
:end_el ->
{ make_element(acc), ts }
{ make_element(acc, opts), ts }
end
end
defp parse_element([], acc) do
{ make_element(acc), [] }
defp parse_element([], acc, opts) do
{ make_element(acc, opts), [] }
end

defp make_element(acc) do
defp make_element(acc, opts) do
attrs = acc[:attrs]
%Eml.Element{tag: acc[:tag], attrs: Enum.into(attrs, %{}), content: finalize_content(acc[:content], acc[:tag])}
%Eml.Element{tag: acc[:tag], attrs: Enum.into(attrs, %{}), content: finalize_content(acc[:content], acc[:tag], opts)}
end

defp preparse(:blank, _), do: :skip
Expand All @@ -363,7 +367,7 @@ defmodule Eml.HTML.Parser do

defp preparse(:cdata, token), do: { :cdata, token }

defp finalize_content(content, tag)
defp finalize_content(content, tag, _opts)
when tag in [:textarea, :pre] do
case content do
[content] when is_binary(content) ->
Expand All @@ -374,15 +378,21 @@ defmodule Eml.HTML.Parser do
content
end
end
defp finalize_content(content, _) do
defp finalize_content(content, _, opts) do
trim_whitespace = get_trim_whitespace_opt(opts)

case content do
[content] when is_binary(content) ->
trim_whitespace(content, :only)
if trim_whitespace, do: trim_whitespace(content, :only), else: content
[] ->
nil
[first | rest] ->
first = trim_whitespace(first, :first)
[first | trim_whitespace_loop(rest, [])]
if trim_whitespace do
first = trim_whitespace(first, :first)
[first | trim_whitespace_loop(rest, [])]
else
[first | rest]
end
end
end

Expand Down Expand Up @@ -422,4 +432,9 @@ defmodule Eml.HTML.Parser do
end
defp trim_whitespace({ :cdata, noop }, _, _, _), do: noop
defp trim_whitespace(noop, _, _, _), do: noop

defp get_trim_whitespace_opt(opts) when is_list(opts) do
app_default = Application.get_env(:eml, :trim_whitespace, true)
Keyword.get(opts, :trim_whitespace, app_default)
end
end