From 2c7365da095389c9ae7af93c2eabe494c7ba8ac5 Mon Sep 17 00:00:00 2001
From: davidmezzetti <561939+davidmezzetti@users.noreply.github.com>
Date: Thu, 22 Apr 2021 13:31:07 -0400
Subject: [PATCH] Use lxml to help generate clean HTML
---
examples/search.py | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/examples/search.py b/examples/search.py
index e9f2717..0d7b550 100644
--- a/examples/search.py
+++ b/examples/search.py
@@ -1,8 +1,8 @@
"""
Search a paperai index.
-Requires streamlit to be installed.
- pip install streamlit
+Requires streamlit and lxml to be installed.
+ pip install streamlit lxml
"""
import os
@@ -12,6 +12,8 @@
import pandas as pd
import streamlit as st
+from lxml.html.clean import clean_html
+
from paperai.models import Models
from paperai.query import Query
@@ -56,12 +58,11 @@ def search(self, query, topn, threshold):
# Print each result, sorted by max score descending
for uid in sorted(documents, key=lambda k: sum([x[0] for x in documents[k]]), reverse=True):
- cur.execute("SELECT Title, Published, Publication, Design, Size, Sample, Method, Entry, Id, Reference " +
+ cur.execute("SELECT Title, Published, Publication, Design, Size, Sample, Method, Entry, Id, Reference " +
"FROM articles WHERE id = ?", [uid])
article = cur.fetchone()
- matches = "\n".join([text for _, text in documents[uid]])
- matches = matches.replace("<", "<").replace(">", ">").replace("&", "&")
+ matches = "
".join([text for _, text in documents[uid]])
title = "%s" % (article[9], article[0])
@@ -94,7 +95,8 @@ def run(self):
st.markdown("
%d results
" % len(df), unsafe_allow_html=True) if not df.empty: - st.write(df[columns].to_html(escape=False, index=False), unsafe_allow_html=True) + html = df[columns].to_html(escape=False, index=False) + st.write(clean_html(html), unsafe_allow_html=True) @st.cache(allow_output_mutation=True) def create(path): @@ -113,7 +115,7 @@ def create(path): if len(sys.argv) <= 1 or not os.path.isdir(sys.argv[1]): st.error("Path to embeddings index not present or invalid") - else: + else: st.set_page_config(layout="wide") # Create and run application