Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions api_routes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import subprocess
from collections import defaultdict
from datetime import datetime
from pathlib import Path

import langcheck
import pytz
Expand Down Expand Up @@ -84,6 +86,38 @@ def logs():
return jsonify(logs=db.get_chatlogs_and_metrics(per_page, offset))


@api_routes_blueprint.route('/api/logs_comparison', methods=['GET'])
def logs_comparison():
page = int(request.args.get('page', 1))
database_a_name = request.args.get('database_a')
database_b_name = request.args.get('database_b')
assert database_a_name is not None
assert database_b_name is not None
database_a_path = Path('db/' + database_a_name)
database_b_path = Path('db/' + database_b_name)

errors = defaultdict(list)
if not database_a_path.exists():
errors['database-a'].append(
f'{database_a_name} does not exist in the db/ directory')
elif not database_a_path.is_file():
errors['database-a'].append(f'{database_a_name} is not a file')
if not database_b_path.exists():
errors['database-b'].append(
f'{database_b_name} does not exist in the db/ directory')
elif not database_b_path.is_file():
errors['database-b'].append(f'{database_b_name} is not a file')
if len(errors) > 0:
return {'success': False, 'errors': errors}

per_page = 10
offset = (page - 1) * per_page
return jsonify(success=True,
logs=db.get_comparison_chatlogs_and_metrics(
str(database_a_path), str(database_b_path), per_page,
offset))


@api_routes_blueprint.route('/api/metrics/<log_id>', methods=['GET'])
def metrics_endpoint(log_id):
metrics_data = db.get_metrics_by_log_id(log_id)
Expand Down
115 changes: 109 additions & 6 deletions database.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,43 @@
DATABASE_URL = 'db/langcheckchat.db'


def initialize_db():
def initialize_db(database_url: str = DATABASE_URL):
with open('db/chat_log_schema.sql', 'r') as file:
chat_log_schema_script = file.read()
with open('db/metric_schema.sql', 'r') as file:
metric_schema_script = file.read()

with sqlite3.connect(DATABASE_URL) as conn:
with sqlite3.connect(database_url) as conn:
cursor = conn.cursor()
cursor.executescript(chat_log_schema_script)
cursor.executescript(metric_schema_script)
conn.commit()


def _select_data(query: str,
params: Optional[Dict[str, Any]] = None) -> List[sqlite3.Row]:
params: Optional[Dict[str, Any]] = None,
database_url: str = DATABASE_URL) -> List[sqlite3.Row]:
'''Runs a SQL SELECT query on the SQLite database.
'''
if params is None:
params = {}

with sqlite3.connect(DATABASE_URL) as conn:
with sqlite3.connect(database_url) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
return cursor.execute(query, params).fetchall()


def _edit_data(query: str,
params: Optional[List[Any]] = None) -> Optional[int]:
params: Optional[List[Any]] = None,
database_url: str = DATABASE_URL) -> Optional[int]:
'''Runs a SQL INSERT or UPDATE query on the SQLite database.
For a INSERT query, it returns the last inserted row id (lastrowid).
'''
if params is None:
params = []

with sqlite3.connect(DATABASE_URL) as conn:
with sqlite3.connect(database_url) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute(query, params)
Expand Down Expand Up @@ -109,6 +111,107 @@ def get_chatlogs_and_metrics(limit: int, offset: int) -> List[dict]:
return list(id_to_logs.values())


def get_comparison_chatlogs_and_metrics(database_a_url: str,
database_b_url: str, limit: int,
offset: int) -> List[dict]:
'''
Returns a list of chat logs and metrics for Database A and Database B, each
of which is a dictionary with the following structure:
{
"<chat_log_id_a>": {
"request_a": "...",
"response_a": "...",
"response_b": "...",
"reference_a": "...",
"timestamp_a": "<timestamp>",
"source_a": "..",
"source_b": "..",
"language_a": "<language>",
"status_a": "done",
"metrics_a": {
"ai_disclaimer_similarity": {"metric_value": <metric_value>, "explanation": "..."},
"factual_consistency_openai": {"metric_value": <metric_value>, "explanation": "..."},
...
},
"metrics_b": {
"ai_disclaimer_similarity": {"metric_value": <metric_value>, "explanation": "..."},
"factual_consistency_openai": {"metric_value": <metric_value>, "explanation": "..."},
...
}
}
}
'''
query_a = '''
SELECT chat_log.*, metric.metric_name, metric.metric_value, metric.explanation
FROM (
SELECT * FROM chat_log
ORDER BY timestamp DESC
LIMIT :limit OFFSET :offset
) AS chat_log
LEFT JOIN metric ON chat_log.id = metric.log_id
'''
a_logs = _select_data(query_a,
params={
'limit': limit,
'offset': offset
},
database_url=database_a_url)
query_b = '''
SELECT chat_log.*, metric.metric_name, metric.metric_value, metric.explanation
FROM (
SELECT * FROM chat_log
ORDER BY timestamp DESC
) AS chat_log
LEFT JOIN metric ON chat_log.id = metric.log_id
'''
b_logs = _select_data(query_b, database_url=database_b_url)
metric_columns = ['metric_name', 'metric_value', 'explanation']

# Each row in a_logs corresponds to a single metric. We want to group
# together all the metrics for a single chat log.
id_to_logs = {}
request_a_to_id = {}
for log in a_logs:
id = log['id']
if id not in id_to_logs:
# Append '_a' to the keys to distinguish them from the keys in
# b_logs
chat_log = {
f'{k}_a': log[k]
for k in log.keys() if k not in metric_columns
}
id_to_logs[id] = chat_log
id_to_logs[id]['metrics_a'] = {}
id_to_logs[id]['metrics_b'] = {}
# Store the mapping from request to id
request_a_to_id[log['request']] = id
id_to_logs[id]['metrics_a'][log['metric_name']] = {
'metric_value': log['metric_value'],
'explanation': log['explanation']
}

for log in b_logs:
request_b = log['request']
# Ignore this log if the request does not match any of the requests in
# a_logs
if request_b not in request_a_to_id:
continue
a_id = request_a_to_id[request_b]

# Add response_b and source_b to the logs. Note that these may already
# have been added (since each row in b_logs corresponds to a single
# metric), but they should be the same so it doesn't matter.
id_to_logs[a_id]['response_b'] = log['response']
id_to_logs[a_id]['source_b'] = log['source']

# Add the metrics from b_logs to the logs
id_to_logs[a_id]['metrics_b'][log['metric_name']] = {
'metric_value': log['metric_value'],
'explanation': log['explanation']
}
return list(id_to_logs.values())


def insert_chatlog(data: Dict[str, Any]) -> int:
columns = ', '.join(data.keys())
placeholders = ', '.join(['?' for _ in data.keys()])
Expand Down
Binary file added db/evaluation_results_a.db
Binary file not shown.
Binary file added db/evaluation_results_b.db
Binary file not shown.
84 changes: 84 additions & 0 deletions static/logs_comparison.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<!DOCTYPE html>
<html lang="en">

<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Q&A Logs</title>

<link rel="icon" type="image/png" href="/static/favicon.png" />
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link
href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono&family=IBM+Plex+Sans:wght@400;500;600;700&display=swap"
rel="stylesheet">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css"
integrity="sha384-xOolHFLEh07PJGoPkLv1IbcEPTNtaed2xpHsD9ESMhqIYd0nLMwNLD69Npy4HI+N" crossorigin="anonymous">
<link rel="stylesheet" href="/static/style.css">
</head>

<body>
<div class="container-fluid mt-3">
<h1 class="mt-3 mb-3 text-center">Comparison Logs</h1>

<form id="database-names-form" class="form-group row justify-content-center">
<div class="col-5">
<div class="input-group shadow-sm mb-2">
<div class="input-group-prepend">
<div class="input-group-text font-weight-bold">Database A</div>
</div>
<input data-error-for="database-a" type="text" class="form-control" id="database-a" placeholder="E.g. evaluation_results_a.db" value="evaluation_results_a.db" required>
</div>
<div data-error-message-for="database-a" class="text-danger database-name-error-message"></div>
</div>
<div class="col-5">
<div class="input-group shadow-sm mb-2">
<div class="input-group-prepend">
<div class="input-group-text font-weight-bold">Database B</div>
</div>
<input data-error-for="database-b" type="text" class="form-control" id="database-b" placeholder="E.g. evaluation_results_b.db" value="evaluation_results_b.db" required>
</div>
<div data-error-message-for="database-b" class="text-danger database-name-error-message"></div>
</div>
<div class="col-auto">
<button id="database-names-btn" type="submit" class="btn btn-primary mb-2">Submit</button>
</div>
</form>

<table id="qa-table" class="table">
<thead class="thead-dark">
<tr>
<th scope="col" class="w-20">User Message</th>
<th scope="col" class="w-20">Bot Message A</th>
<th scope="col" class="w-20">Bot Message B</th>
<th scope="col" class="w-10">Reference</th>
<th scope="col" class="w-25">Metric A</th>
<th scope="col" class="w-25">Metric B</th>
<th scope="col" class="w-25">Source A</th>
<th scope="col" class="w-25">Source B</th>
</tr>
</thead>
<tbody>
<!-- Rows inserted by JavaScript -->
</tbody>
</table>

<div class="pagination-container mt-3 mb-3">
<button id="prevButton" class="btn btn-primary">Previous</button>
<span id="pageIndicator" class="mx-3"></span>
<button id="nextButton" class="btn btn-primary">Next</button>
</div>

<div class="mt-3">
<a href="/">Back to Chat</a>
</div>
</div>
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
<script src="/static/logs_comparison.js"></script>
<script src="/static/utils.js"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/feather-icons/4.29.1/feather.min.js"></script>
<script>feather.replace()</script>
</body>

</html>
Loading