Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
13ac00b
refactor versa with oo update -> a major update
ftshijt Jun 16, 2025
58eb805
Merge branch 'main' into refactor
ftshijt Jun 19, 2025
8d2f6f0
Merge branch 'main' into refactor
ftshijt Jun 19, 2025
8796810
add asvspoof.py
ftshijt Jun 30, 2025
53c3e0a
Merge branch 'refactor' of https://github.com/ftshijt/versa into refa…
ftshijt Jun 30, 2025
7e95ee8
update discrets speech / chroma_alignment
ftshijt Jun 30, 2025
b7b9dd4
update test function and versa with black and emo_vad
ftshijt Jun 30, 2025
dcc1822
Merge branch 'main' into refactor
ftshijt Jun 30, 2025
8ff163a
update emo_similarity
ftshijt Jun 30, 2025
78894ee
fix metric list and set setup.py
ftshijt Jun 30, 2025
e5e10bf
fix setup.py
ftshijt Jun 30, 2025
892a13b
fix scorer shared for all cases
ftshijt Jul 5, 2025
ce9f828
update code multiple new metrics
ftshijt Jul 5, 2025
e95cd4b
fix versa/test for test functions
ftshijt Jul 5, 2025
f4799fd
add pam fixed
ftshijt Jul 5, 2025
03ccbda
add pesq
ftshijt Jul 5, 2025
20e155d
Migrate base metrics to OO interface
ftshijt Apr 29, 2026
b6f50f1
Migrate VAD metric to OO interface
ftshijt Apr 29, 2026
fcdd9af
Migrate additional utterance metrics
ftshijt Apr 29, 2026
61cc53f
Fix metric migration real setup
ftshijt Apr 29, 2026
e7d494b
Merge pull request #1 from wavlab-speech/codex/pr-37-refactor
ftshijt Apr 29, 2026
4e6913a
Restore legacy metric support
ftshijt Apr 30, 2026
1454f78
Restore legacy scorer compatibility
ftshijt May 5, 2026
404fc77
Use local cache for ESPnet metrics
ftshijt May 5, 2026
bc32cbb
Fix legacy metric setup paths
ftshijt May 5, 2026
7a1ee6f
Route Hugging Face metric caches locally
ftshijt May 5, 2026
feea48d
Fix legacy metric installers and pipeline baselines
ftshijt May 5, 2026
2a7d7ad
Clean up metric cache installers
ftshijt May 5, 2026
200bb4a
Clean up singer identity cache installer
ftshijt May 5, 2026
22410f4
Merge pull request #2 from wavlab-speech/codex/pr-37-refactor
ftshijt May 5, 2026
19f270a
Merge main metric additions into refactor interface
ftshijt May 5, 2026
76b9da9
Avoid WVMOS import-time downloads
ftshijt May 5, 2026
b788c1e
Fix PR 37 CI failures
ftshijt May 5, 2026
f2f4c81
Merge upstream main into refactor
ftshijt May 6, 2026
bcec446
Make README example commands runnable
ftshijt May 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 21 additions & 17 deletions test/test_pipeline/test_asr_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,21 @@

import yaml

from versa.scorer_shared import (
find_files,
list_scoring,
load_score_modules,
load_summary,
)
from versa.scorer_shared import VersaScorer, compute_summary
from versa.utils_shared import find_files
from versa.definition import MetricRegistry
from versa.utterance_metrics.asr_matching import register_asr_match_metric

TEST_INFO = {
"asr_match_error_rate": 0.0,
}
TEST_INFO = {"asr_match_error_rate": 0.0}


def info_update():

# find files
if os.path.isdir("test/test_samples/test2"):
gen_files = find_files("test/test_samples/test2")

# find reference file
gt_files = None
if os.path.isdir("test/test_samples/test1"):
gt_files = find_files("test/test_samples/test1")

Expand All @@ -31,25 +27,33 @@ def info_update():
with open("egs/separate_metrics/asr_match.yaml", "r", encoding="utf-8") as f:
score_config = yaml.full_load(f)

score_modules = load_score_modules(
# Create registry and register ASR-Match metric
registry = MetricRegistry()
register_asr_match_metric(registry)

# Initialize VersaScorer with the populated registry
scorer = VersaScorer(registry)

# Load metrics using the new API
metric_suite = scorer.load_metrics(
score_config,
use_gt=(True if gt_files is not None else False),
use_gpu=False,
)

assert len(score_config) > 0, "no scoring function is provided"

score_info = list_scoring(
gen_files, score_modules, gt_files, output_file=None, io="soundfile"
# Score utterances using the new API
score_info = scorer.score_utterances(
gen_files, metric_suite, gt_files, output_file=None, io="soundfile"
)
summary = load_summary(score_info)
print("Summary: {}".format(load_summary(score_info)), flush=True)

summary = compute_summary(score_info)
print("Summary: {}".format(summary), flush=True)

for key in summary:
if math.isinf(TEST_INFO[key]) and math.isinf(summary[key]):
# for sir"
continue
# the plc mos is undeterministic
if abs(TEST_INFO[key] - summary[key]) > 1e-4 and key != "plcmos":
raise ValueError(
"Value issue in the test case, might be some issue in scorer {}".format(
Expand Down
42 changes: 23 additions & 19 deletions test/test_pipeline/test_srmr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,12 @@

import yaml

from versa.scorer_shared import (
find_files,
list_scoring,
load_score_modules,
load_summary,
)
from versa.scorer_shared import VersaScorer, compute_summary
from versa.utils_shared import find_files
from versa.definition import MetricRegistry
from versa.utterance_metrics.srmr import register_srmr_metric

TEST_INFO = {
"srmr": 0.6123816687905584,
}
TEST_INFO = {"srmr": 0.6123816687905584}


def info_update():
Expand All @@ -23,6 +19,7 @@ def info_update():
gen_files = find_files("test/test_samples/test2")

# find reference file
gt_files = None
if os.path.isdir("test/test_samples/test1"):
gt_files = find_files("test/test_samples/test1")

Expand All @@ -31,26 +28,33 @@ def info_update():
with open("egs/separate_metrics/srmr.yaml", "r", encoding="utf-8") as f:
score_config = yaml.full_load(f)

score_modules = load_score_modules(
# Create registry and register SRMR metric
registry = MetricRegistry()
register_srmr_metric(registry)

# Initialize VersaScorer with the populated registry
scorer = VersaScorer(registry)

# Load metrics using the new API
metric_suite = scorer.load_metrics(
score_config,
use_gt=(True if gt_files is not None else False),
use_gpu=False,
)

assert len(score_config) > 0, "no scoring function is provided"

score_info = list_scoring(
gen_files, score_modules, gt_files, output_file=None, io="soundfile"
# Score utterances using the new API
score_info = scorer.score_utterances(
gen_files, metric_suite, gt_files,
output_file=None, io="soundfile"
)
summary = load_summary(score_info)
print("Summary: {}".format(load_summary(score_info)), flush=True)

summary = compute_summary(score_info)
print("Summary: {}".format(summary), flush=True)

for key in summary:
if math.isinf(TEST_INFO[key]) and math.isinf(summary[key]):
# for sir"
continue
# the plc mos is undeterministic
if abs(TEST_INFO[key] - summary[key]) > 1e-4 and key != "plcmos":
if abs(TEST_INFO[key] - summary[key]) > 1e-4:
raise ValueError(
"Value issue in the test case, might be some issue in scorer {}".format(
key
Expand Down
4 changes: 2 additions & 2 deletions versa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
whisper_levenshtein_metric,
whisper_wer_setup,
)
from versa.utterance_metrics.asr_matching import asr_match_metric, asr_match_setup
from versa.utterance_metrics.asr_matching import ASRMatchMetric, register_asr_match_metric
from versa.utterance_metrics.audiobox_aesthetics_score import (
audiobox_aesthetics_score,
audiobox_aesthetics_setup,
Expand Down Expand Up @@ -102,4 +102,4 @@
speaking_rate_model_setup,
)
from versa.utterance_metrics.squim import squim_metric, squim_metric_no_ref
from versa.utterance_metrics.srmr import srmr_metric
from versa.utterance_metrics.srmr import SRMRMetric, register_srmr_metric
53 changes: 30 additions & 23 deletions versa/bin/scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,8 @@

from versa.scorer_shared import (
audio_loader_setup,
corpus_scoring,
list_scoring,
load_corpus_modules,
load_score_modules,
load_summary,
VersaScorer,
compute_summary,
)


Expand Down Expand Up @@ -141,47 +138,57 @@ def main():
with open(args.score_config, "r", encoding="utf-8") as f:
score_config = yaml.full_load(f)

score_modules = load_score_modules(
# Initialize VersaScorer
scorer = VersaScorer()

# Load utterance-level metrics
utterance_metrics = scorer.load_metrics(
score_config,
use_gt=(True if gt_files is not None else False),
use_gt_text=(True if text_info is not None else False),
use_gpu=args.use_gpu,
)

if len(score_modules) > 0:
score_info = list_scoring(
# Perform utterance-level scoring
if len(utterance_metrics.metrics) > 0:
score_info = scorer.score_utterances(
gen_files,
score_modules,
utterance_metrics,
gt_files,
text_info,
output_file=args.output_file,
io=args.io,
)
logging.info("Summary: {}".format(load_summary(score_info)))
logging.info("Summary: {}".format(compute_summary(score_info)))
else:
logging.info("No utterance-level scoring function is provided.")

corpus_score_modules = load_corpus_modules(
# Load corpus-level metrics (distributional metrics)
corpus_metrics = scorer.load_metrics(
score_config,
use_gt=(True if gt_files is not None else False),
use_gt_text=(True if text_info is not None else False),
use_gpu=args.use_gpu,
cache_folder=args.cache_folder,
io=args.io,
)
assert (
len(corpus_score_modules) > 0 or len(score_modules) > 0
), "no scoring function is provided"
if len(corpus_score_modules) > 0:
corpus_score_info = corpus_scoring(
args.pred,
corpus_score_modules,
args.gt,

# Filter for corpus-level metrics and perform corpus scoring
from versa.definition import MetricCategory
corpus_suite = corpus_metrics.filter_by_category(MetricCategory.DISTRIBUTIONAL)
if len(corpus_suite.metrics) > 0:
corpus_score_info = scorer.score_corpus(
gen_files,
corpus_suite,
gt_files,
text_info,
output_file=args.output_file + ".corpus",
output_file=args.output_file + ".corpus" if args.output_file else None,
)
logging.info("Corpus Summary: {}".format(corpus_score_info))
else:
logging.info("No corpus-level scoring function is provided.")
return

# Ensure at least one scoring function is provided
if len(utterance_metrics.metrics) == 0 and len(corpus_suite.metrics) == 0:
raise ValueError("No scoring function is provided")


if __name__ == "__main__":
Expand Down
Loading
Loading