diff --git a/kirsche/command.py b/kirsche/command.py index 760eb0a..416984c 100644 --- a/kirsche/command.py +++ b/kirsche/command.py @@ -3,6 +3,7 @@ import click from loguru import logger +from pyecharts.charts.base import default from kirsche.download import list_unique_ids, download_metadata from kirsche.connect import ( append_connections, @@ -12,6 +13,7 @@ from kirsche.dataset import DataViews from kirsche.utils.io import load_json from kirsche.utils.bib import load_bib +from kirsche.visalize import make_chart, PaperGraph, visualize logger.remove() @@ -69,7 +71,7 @@ def _metadata(paper_id, bib_file, metadata_file, sleep_time): def kirsche(ctx): if ctx.invoked_subcommand is None: click.echo("Hello {}".format(os.environ.get("USER", ""))) - click.echo("Welcome to Kirsche.") + click.echo("Welcome to Kirsche. Use kirsche --help for help.") else: pass @@ -78,7 +80,7 @@ def kirsche(ctx): @click.option("--paper_id", "-p", help="Paper ID", multiple=True) @click.option("--bib_file", "-b", type=click.Path(exists=True), help="Bib file path") @click.option("--metadata_file", "-m", help="Target data file path") -@click.option("--sleep_time", "-s", default=1, help="Sleep time between requests") +@click.option("--sleep_time", "-st", default=1, help="Sleep time between requests") def metadata(paper_id, bib_file, metadata_file, sleep_time): """Download paper data from service provides (e.g., SemanticScholar). @@ -132,7 +134,7 @@ def connections_from_metadata(metadata_file, connected_papers_file): help="path to data file with paper metadata", ) @click.option("--connected_papers_file", "-c", help="path to save enhanced data file") -@click.option("--sleep_time", "-s", default=1, help="Sleep time between requests") +@click.option("--sleep_time", "-st", default=1, help="Sleep time between requests") def connections(paper_id, bib_file, metadata_file, connected_papers_file, sleep_time): """Establish connections between the list of papers, either from a list of DOIs, bib file, or from download metadata file. @@ -184,5 +186,73 @@ def connections(paper_id, bib_file, metadata_file, connected_papers_file, sleep_ click.echo(dv.json_simple) +@kirsche.command() +@click.option( + "--source_paper_id", "-sp", required=False, help="Source: Paper ID", multiple=True +) +@click.option( + "--source_bib_file", + "-sb", + required=False, + type=click.Path(exists=True), + help="Source: Bib file path", +) +@click.option( + "--source_metadata_file", + "-sm", + required=False, + type=click.Path(exists=True), + help="Source: path to data file with paper metadata", +) +@click.option( + "--source_connected_papers_file", + "-sc", + required=False, + help="Source: path to save enhanced data file", +) +@click.option("--title", default="Kirsche: Paper Graph", help="title of the chart") +@click.option( + "--target_html_path", "-th", required=True, help="Target: path to html file" +) +@click.option("--sleep_time", "-st", default=1, help="Sleep time between requests") +def visualization( + source_paper_id, + source_bib_file, + source_metadata_file, + source_connected_papers_file, + title, + target_html_path, + sleep_time, +): + """ """ + if source_connected_papers_file: + connected_papers = load_json(source_connected_papers_file) + else: + click.secho(f"Retrieving paper metadata...") + if not source_metadata_file: + if source_bib_file: + logger.debug(f"Using bib file: {source_bib_file}") + records = _metadata(source_paper_id, source_bib_file, None, sleep_time) + else: + records = load_json(source_metadata_file) + click.secho(f" Retrieved {len(records)} records.") + + click.secho(f"Connecting papers...") + connected_papers = append_connections(records) + click.secho(f" Connected papers...") + + # Filter out unnecessary keys in the dictionary + click.secho(f"Filtering and saving data...") + connected_papers = save_connected_papers(connected_papers) + click.secho(f" Done...") + + g = PaperGraph(connected_papers, title=title) + nodes = g.nodes + edges = g.edges + + click.secho(f"Saving html file...") + visualize(nodes, edges, g.title, target_html_path) + + if __name__ == "__main__": pass diff --git a/kirsche/utils/graph.py b/kirsche/utils/graph.py index ddd79d2..58ec911 100644 --- a/kirsche/utils/graph.py +++ b/kirsche/utils/graph.py @@ -2,16 +2,21 @@ class PaperGraph: - """A graph object to hold graphs""" + """A graph object to hold graphs - def __init__(self, paper_connections): + :param paper_connections: a list of dictionaries that specifies the connections between papers + :param title: the title of the graph + """ + + def __init__(self, paper_connections: list, title: str = None): if not isinstance(paper_connections, list): raise TypeError("The connections paper_connections must be a dictionary") self.nodes, self.edges = self._extract_nodes_and_edges(paper_connections) + self.title = title - def _calculate_node(self, node, schema): + def _calculate_node(self, node: dict, schema: dict): """calculate the node""" simplified_node = {} for k, v in schema.items(): @@ -23,30 +28,29 @@ def _calculate_node(self, node, schema): return simplified_node - def _extract_nodes_and_edges(self, connections, node_schema=None, edge_schema=None): - """extract nodes and edges from connections""" + def _extract_nodes_and_edges( + self, connections: list, node_schema: dict = None, edge_schema: dict = None + ): + """extract nodes and edges from connections + + :param connections: a list of dictionaries that specifies the connections between papers + :param node_schema: a dictionary that specifies how the nodes are extracted + :param edge_schema: a dictionary that specifies how the edges are built + """ if node_schema is None: node_schema = { "name": { "key": "title", - # "key": "doi", - "default": "No Title" - }, - "id": { - # "key": "title" - "key": "doi" + "default": "No Title", }, + "id": {"key": "doi"}, "symbolSize": { "key": "numCitedBy", - "transform": lambda x: 5*math.log(x+2) - }, - "x": { - "key": "year" - }, - "y": { - "key": "numCiting" + "transform": lambda x: 5 * math.log(x + 2), }, + "x": {"key": "year"}, + "y": {"key": "numCiting"}, } if edge_schema is None: @@ -70,8 +74,7 @@ def _extract_nodes_and_edges(self, connections, node_schema=None, edge_schema=No return nodes, edges def __str__(self): - return f"nodes: {self.nodes}\nedges: {self.edges}" - + return f"Graph: {self.title}\n nodes: {self.nodes}\n edges: {self.edges}" if __name__ == "__main__": diff --git a/kirsche/visalize.py b/kirsche/visalize.py index 83f7e1b..b3463f9 100644 --- a/kirsche/visalize.py +++ b/kirsche/visalize.py @@ -1,11 +1,40 @@ +from pathlib import Path +from typing import Union + import pyecharts.options as opts from pyecharts.charts import Graph +from kirsche.utils.graph import PaperGraph +from kirsche.utils.io import load_json + + +def load_graph(connections_json: Union[str, Path], title: str) -> PaperGraph: + """Load json file that contains the paper connection information, and build a graph using it. + + :param connections_json: json file that contains the paper connection information + :param title: title of the graph which will be shown in the top of the chart + """ + + data = load_json(connections_json) + if data: + g = PaperGraph(data, title=title) + else: + raise ValueError(f"No data in json file: {connections_json}!") -def visualize(nodes, edges, target, title): - """Generate interactive graphs""" + return g + +def visualize(nodes: list, edges: list, title: str, target: Union[str, Path]) -> None: + """Generate interactive graphs + + :param nodes: nodes of the graph + :param edges: edges of the graph + :param title: title of the graph which will be shown in the top of the chart + :param target: target file path + """ + + # Build the graph and export it to html file ( Graph(init_opts=opts.InitOpts(width="1600px", height="800px")) .add( @@ -21,3 +50,30 @@ def visualize(nodes, edges, target, title): .set_global_opts(title_opts=opts.TitleOpts(title=title)) .render(target) ) + + +def make_chart( + connections_json: Union[Path, str], target: Union[Path, str], title: str +) -> None: + """Generate interactive graphs + + :param connections_json: json file that contains the paper connection information + :param target: target file path + :param title: title of the graph which will be shown in the top of the chart + """ + + g = load_graph(connections_json, title) + + nodes = g.nodes + edges = g.edges + + visualize(nodes, edges, g.title, target) + + +if __name__ == "__main__": + import json + + paper_connections = "tests/data/io/test.json" + target = "tests/data/visualize/test.html" + + make_chart(paper_connections, target, "This is an Experiment") diff --git a/tests/data/visualize/test.html b/tests/data/visualize/test.html new file mode 100644 index 0000000..236bf99 --- /dev/null +++ b/tests/data/visualize/test.html @@ -0,0 +1,894 @@ + + + + + Awesome-pyecharts + + + + +
+ + + diff --git a/tests/data/visualize/test_32.html b/tests/data/visualize/test_32.html new file mode 100644 index 0000000..924ed43 --- /dev/null +++ b/tests/data/visualize/test_32.html @@ -0,0 +1,894 @@ + + + + + Awesome-pyecharts + + + + +
+ + + diff --git a/tests/data/visualize/test__connection_enhanced.html b/tests/data/visualize/test__connection_enhanced.html new file mode 100644 index 0000000..4cf617a --- /dev/null +++ b/tests/data/visualize/test__connection_enhanced.html @@ -0,0 +1,131 @@ + + + + + Awesome-pyecharts + + + + +
+ + +