Skip to content

Commit

Permalink
added visualizatioins
Browse files Browse the repository at this point in the history
  • Loading branch information
emptymalei committed Sep 21, 2021
1 parent eded26f commit 02ae2e1
Show file tree
Hide file tree
Showing 6 changed files with 2,073 additions and 25 deletions.
76 changes: 73 additions & 3 deletions kirsche/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import click
from loguru import logger
from pyecharts.charts.base import default
from kirsche.download import list_unique_ids, download_metadata
from kirsche.connect import (
append_connections,
Expand All @@ -12,6 +13,7 @@
from kirsche.dataset import DataViews
from kirsche.utils.io import load_json
from kirsche.utils.bib import load_bib
from kirsche.visalize import make_chart, PaperGraph, visualize


logger.remove()
Expand Down Expand Up @@ -69,7 +71,7 @@ def _metadata(paper_id, bib_file, metadata_file, sleep_time):
def kirsche(ctx):
if ctx.invoked_subcommand is None:
click.echo("Hello {}".format(os.environ.get("USER", "")))
click.echo("Welcome to Kirsche.")
click.echo("Welcome to Kirsche. Use kirsche --help for help.")
else:
pass

Expand All @@ -78,7 +80,7 @@ def kirsche(ctx):
@click.option("--paper_id", "-p", help="Paper ID", multiple=True)
@click.option("--bib_file", "-b", type=click.Path(exists=True), help="Bib file path")
@click.option("--metadata_file", "-m", help="Target data file path")
@click.option("--sleep_time", "-s", default=1, help="Sleep time between requests")
@click.option("--sleep_time", "-st", default=1, help="Sleep time between requests")
def metadata(paper_id, bib_file, metadata_file, sleep_time):
"""Download paper data from service provides (e.g., SemanticScholar).
Expand Down Expand Up @@ -132,7 +134,7 @@ def connections_from_metadata(metadata_file, connected_papers_file):
help="path to data file with paper metadata",
)
@click.option("--connected_papers_file", "-c", help="path to save enhanced data file")
@click.option("--sleep_time", "-s", default=1, help="Sleep time between requests")
@click.option("--sleep_time", "-st", default=1, help="Sleep time between requests")
def connections(paper_id, bib_file, metadata_file, connected_papers_file, sleep_time):
"""Establish connections between the list of papers, either from a list of DOIs, bib file, or from download metadata file.
Expand Down Expand Up @@ -184,5 +186,73 @@ def connections(paper_id, bib_file, metadata_file, connected_papers_file, sleep_
click.echo(dv.json_simple)


@kirsche.command()
@click.option(
"--source_paper_id", "-sp", required=False, help="Source: Paper ID", multiple=True
)
@click.option(
"--source_bib_file",
"-sb",
required=False,
type=click.Path(exists=True),
help="Source: Bib file path",
)
@click.option(
"--source_metadata_file",
"-sm",
required=False,
type=click.Path(exists=True),
help="Source: path to data file with paper metadata",
)
@click.option(
"--source_connected_papers_file",
"-sc",
required=False,
help="Source: path to save enhanced data file",
)
@click.option("--title", default="Kirsche: Paper Graph", help="title of the chart")
@click.option(
"--target_html_path", "-th", required=True, help="Target: path to html file"
)
@click.option("--sleep_time", "-st", default=1, help="Sleep time between requests")
def visualization(
source_paper_id,
source_bib_file,
source_metadata_file,
source_connected_papers_file,
title,
target_html_path,
sleep_time,
):
""" """
if source_connected_papers_file:
connected_papers = load_json(source_connected_papers_file)
else:
click.secho(f"Retrieving paper metadata...")
if not source_metadata_file:
if source_bib_file:
logger.debug(f"Using bib file: {source_bib_file}")
records = _metadata(source_paper_id, source_bib_file, None, sleep_time)
else:
records = load_json(source_metadata_file)
click.secho(f" Retrieved {len(records)} records.")

click.secho(f"Connecting papers...")
connected_papers = append_connections(records)
click.secho(f" Connected papers...")

# Filter out unnecessary keys in the dictionary
click.secho(f"Filtering and saving data...")
connected_papers = save_connected_papers(connected_papers)
click.secho(f" Done...")

g = PaperGraph(connected_papers, title=title)
nodes = g.nodes
edges = g.edges

click.secho(f"Saving html file...")
visualize(nodes, edges, g.title, target_html_path)


if __name__ == "__main__":
pass
43 changes: 23 additions & 20 deletions kirsche/utils/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,21 @@


class PaperGraph:
"""A graph object to hold graphs"""
"""A graph object to hold graphs
def __init__(self, paper_connections):
:param paper_connections: a list of dictionaries that specifies the connections between papers
:param title: the title of the graph
"""

def __init__(self, paper_connections: list, title: str = None):

if not isinstance(paper_connections, list):
raise TypeError("The connections paper_connections must be a dictionary")

self.nodes, self.edges = self._extract_nodes_and_edges(paper_connections)
self.title = title

def _calculate_node(self, node, schema):
def _calculate_node(self, node: dict, schema: dict):
"""calculate the node"""
simplified_node = {}
for k, v in schema.items():
Expand All @@ -23,30 +28,29 @@ def _calculate_node(self, node, schema):

return simplified_node

def _extract_nodes_and_edges(self, connections, node_schema=None, edge_schema=None):
"""extract nodes and edges from connections"""
def _extract_nodes_and_edges(
self, connections: list, node_schema: dict = None, edge_schema: dict = None
):
"""extract nodes and edges from connections
:param connections: a list of dictionaries that specifies the connections between papers
:param node_schema: a dictionary that specifies how the nodes are extracted
:param edge_schema: a dictionary that specifies how the edges are built
"""

if node_schema is None:
node_schema = {
"name": {
"key": "title",
# "key": "doi",
"default": "No Title"
},
"id": {
# "key": "title"
"key": "doi"
"default": "No Title",
},
"id": {"key": "doi"},
"symbolSize": {
"key": "numCitedBy",
"transform": lambda x: 5*math.log(x+2)
},
"x": {
"key": "year"
},
"y": {
"key": "numCiting"
"transform": lambda x: 5 * math.log(x + 2),
},
"x": {"key": "year"},
"y": {"key": "numCiting"},
}

if edge_schema is None:
Expand All @@ -70,8 +74,7 @@ def _extract_nodes_and_edges(self, connections, node_schema=None, edge_schema=No
return nodes, edges

def __str__(self):
return f"nodes: {self.nodes}\nedges: {self.edges}"

return f"Graph: {self.title}\n nodes: {self.nodes}\n edges: {self.edges}"


if __name__ == "__main__":
Expand Down
60 changes: 58 additions & 2 deletions kirsche/visalize.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,40 @@
from pathlib import Path
from typing import Union

import pyecharts.options as opts
from pyecharts.charts import Graph

from kirsche.utils.graph import PaperGraph
from kirsche.utils.io import load_json


def load_graph(connections_json: Union[str, Path], title: str) -> PaperGraph:
"""Load json file that contains the paper connection information, and build a graph using it.
:param connections_json: json file that contains the paper connection information
:param title: title of the graph which will be shown in the top of the chart
"""

data = load_json(connections_json)

if data:
g = PaperGraph(data, title=title)
else:
raise ValueError(f"No data in json file: {connections_json}!")

def visualize(nodes, edges, target, title):
"""Generate interactive graphs"""
return g


def visualize(nodes: list, edges: list, title: str, target: Union[str, Path]) -> None:
"""Generate interactive graphs
:param nodes: nodes of the graph
:param edges: edges of the graph
:param title: title of the graph which will be shown in the top of the chart
:param target: target file path
"""

# Build the graph and export it to html file
(
Graph(init_opts=opts.InitOpts(width="1600px", height="800px"))
.add(
Expand All @@ -21,3 +50,30 @@ def visualize(nodes, edges, target, title):
.set_global_opts(title_opts=opts.TitleOpts(title=title))
.render(target)
)


def make_chart(
connections_json: Union[Path, str], target: Union[Path, str], title: str
) -> None:
"""Generate interactive graphs
:param connections_json: json file that contains the paper connection information
:param target: target file path
:param title: title of the graph which will be shown in the top of the chart
"""

g = load_graph(connections_json, title)

nodes = g.nodes
edges = g.edges

visualize(nodes, edges, g.title, target)


if __name__ == "__main__":
import json

paper_connections = "tests/data/io/test.json"
target = "tests/data/visualize/test.html"

make_chart(paper_connections, target, "This is an Experiment")
Loading

0 comments on commit 02ae2e1

Please sign in to comment.