diff --git a/kobo/apps/subsequences/schemas.py b/kobo/apps/subsequences/schemas.py
index 333ff79c32..2ef53865c4 100644
--- a/kobo/apps/subsequences/schemas.py
+++ b/kobo/apps/subsequences/schemas.py
@@ -29,7 +29,7 @@
 
 
 def validate_submission_supplement(asset: 'kpi.models.Asset', supplement: dict):
-    jsonschema.validate(get_submission_supplement_schema(asset), supplement)
+    jsonschema.validate(supplement, get_submission_supplement_schema(asset))
 
 
 def get_submission_supplement_schema(asset: 'kpi.models.Asset') -> dict:
diff --git a/kobo/apps/subsequences/tests/test_versioning.py b/kobo/apps/subsequences/tests/test_versioning.py
new file mode 100644
index 0000000000..b84efc1824
--- /dev/null
+++ b/kobo/apps/subsequences/tests/test_versioning.py
@@ -0,0 +1,287 @@
+from datetime import datetime, timedelta
+from unittest.mock import patch
+
+from ddt import data, ddt, unpack
+from django.test import TestCase
+from django.utils import timezone
+from freezegun import freeze_time
+
+from kobo.apps.subsequences.utils.versioning import (
+    _determine_source_transcript,
+    _new_revision_from_old,
+    _separate_manual_and_automatic_versions,
+    migrate_submission_supplementals,
+)
+
+
+@ddt
+class TestVersioning(TestCase):
+    def setUp(self):
+        super().setUp()
+        # works for translations or transcriptions
+        self.now = timezone.now().isoformat()
+        self.yesterday = (timezone.now() - timedelta(days=1)).isoformat()
+        self.action_dict = {
+            'dateCreated': None,
+            'dateModified': self.now,
+            'languageCode': 'en',
+            'revisions': [
+                {
+                    'dateModified': self.yesterday,
+                    'languageCode': 'en',
+                    'value': 'Old value',
+                }
+            ],
+            'value': 'Latest value',
+        }
+
+    def test_new_revision_from_old(self):
+        now = timezone.now()
+        old = {
+            'dateCreated': None,
+            'dateModified': '2025-10-22 17:09:38',
+            'languageCode': 'en',
+            'value': 'Transcribed new',
+        }
+        with freeze_time(now):
+            result = _new_revision_from_old(old)
+        assert result['value'] == old['value']
+        assert result['language'] == old['languageCode']
+        assert result['_dateCreated'] == old['dateModified']
+        assert result['_uuid'] is not None
+        assert result['_dateAccepted'] == now.isoformat()
+
+    def test_new_transcript_revision_from_old_returns_none_for_bad_data(self):
+        old = {'badly': 'formatted'}
+        assert _new_revision_from_old(old) is None
+
+    @data(True, False)
+    def test_separate_automatic_and_manual(self, latest_is_automated):
+        automated_transcription_value = (
+            'Latest value' if latest_is_automated else 'Old value'
+        )
+        manual, automated = _separate_manual_and_automatic_versions(
+            self.action_dict, 'en', automated_transcription_value
+        )
+        new_automated_transcript = automated[0]
+        new_manual_transcript = manual[0]
+        expected_most_recent_transcript = (
+            new_automated_transcript if latest_is_automated else new_manual_transcript
+        )
+        expected_old_transcript = (
+            new_manual_transcript if latest_is_automated else new_automated_transcript
+        )
+
+        assert expected_most_recent_transcript['_dateCreated'] == self.now
+        assert expected_most_recent_transcript['value'] == 'Latest value'
+        assert expected_old_transcript['_dateCreated'] == self.yesterday
+        assert expected_old_transcript['value'] == 'Old value'
+
+    def test_separate_automatic_and_manual_forces_language_if_given(self):
+        manual, automated = _separate_manual_and_automatic_versions(
+            self.action_dict, None, None, language='en'
+        )
+        for formatted_item in manual:
+            assert formatted_item['language'] == 'en'
+
+    def test_separate_automatic_and_manual_without_automatic_value(self):
+        manual, automatic = _separate_manual_and_automatic_versions(
+            self.action_dict, None, None
+        )
+        assert len(manual) == 2
+        assert len(automatic) == 0
+
+    @data(
+        # known language, date created, expected result uuid
+        # there is a transcript of the same language with an older date
+        ('de', '2024-12-31', 'uuid4'),
+        # there are transcripts of the same language but none older than the translation
+        ('de', '2023-01-01', 'uuid3'),
+        # there are no transcripts of the same language
+        ('fr', '2024-12-31', 'uuid1'),
+        # we don't know the source language but there are older transcripts
+        (None, '2024-12-31', 'uuid2'),
+        # we don't know the source language and there are no older transcripts
+        (None, '2023-01-01', 'uuid1'),
+    )
+    @unpack
+    def test_determine_source_transcription(
+        self, source_language, date_created, expected_source_uuid
+    ):
+        now = timezone.now()
+        one_day_ago = now - timedelta(days=1)
+        jan_1_2024 = datetime(2024, 1, 1, tzinfo=timezone.utc)
+        jan_2_2024 = datetime(2024, 1, 2, tzinfo=timezone.utc)
+        transcripts = [
+            {
+                '_uuid': 'uuid1',
+                '_dateCreated': now.isoformat(),
+                'language': 'en',
+                '_actionId': 'manual_transcription',
+            },
+            {
+                '_uuid': 'uuid2',
+                '_dateCreated': jan_1_2024.isoformat(),
+                'language': 'en',
+                '_actionId': 'automatic_transcription',
+            },
+            {
+                '_uuid': 'uuid3',
+                '_dateCreated': one_day_ago.isoformat(),
+                'language': 'de',
+                '_actionId': 'manual_transcription',
+            },
+            {
+                '_uuid': 'uuid4',
+                '_dateCreated': jan_2_2024.isoformat(),
+                'language': 'de',
+                '_actionId': 'automatic_transcription',
+            },
+        ]
+        translation_revision = {'_dateCreated': date_created}
+        source_transcript = _determine_source_transcript(
+            translation_revision, transcripts, automatic_source_language=source_language
+        )
+        assert source_transcript['_uuid'] == expected_source_uuid
+
+    # test the whole transformation process
+    def test_migrate_submission_extra_to_supplemental(self):
+        now = timezone.now()
+        one_day_ago = (now - timedelta(days=1)).isoformat()
+        one_year_ago = (now - timedelta(days=365)).isoformat()
+        a_year_and_a_day_ago = (now - timedelta(days=366)).isoformat()
+        old_version = {
+            'Audio_question': {
+                'googlets': {
+                    'languageCode': 'en',
+                    'regionCode': None,
+                    'status': 'complete',
+                    'value': 'This is audio that I am trying to ' 'transcribe.',
+                },
+                'googletx': {
+                    'languageCode': 'es',
+                    'source': 'en',
+                    'status': 'complete',
+                    'value': 'Esto es un audio que estoy ' 'intentando a transcribir.',
+                },
+                'transcript': {
+                    'dateCreated': one_day_ago,
+                    'dateModified': one_day_ago,
+                    'languageCode': 'en',
+                    'revisions': [
+                        {
+                            'dateModified': a_year_and_a_day_ago,
+                            'languageCode': 'en',
+                            'value': 'This is audio that '
+                            'I am trying to '
+                            'transcribe.',
+                        },
+                        {},
+                    ],
+                    'value': 'This is audio that I am trying to '
+                    'transcribe but i edited it.',
+                },
+                'translation': {
+                    'es': {
+                        'dateCreated': one_year_ago,
+                        'dateModified': now.isoformat(),
+                        'languageCode': 'es',
+                        'revisions': [
+                            {
+                                'dateModified': one_year_ago,
+                                'languageCode': 'es',
+                                'value': 'Esto es un '
+                                'audio que '
+                                'estoy '
+                                'intentando a '
+                                'transcribir.',
+                            }
+                        ],
+                        'value': 'Esto es un audio que '
+                        'estoy intentando '
+                        'transcribir pero yo lo edité',
+                    }
+                },
+            }
+        }
+
+        with patch(
+            'kobo.apps.subsequences.utils.versioning.uuid.uuid4',
+            side_effect=['uuid1', 'uuid2', 'uuid3', 'uuid4'],
+        ):
+            with freeze_time(now):
+                migrated = migrate_submission_supplementals(old_version)
+
+        new_version = {
+            '_version': '20250820',
+            'Audio_question': {
+                'automatic_google_transcription': {
+                    '_dateCreated': a_year_and_a_day_ago,
+                    '_dateModified': a_year_and_a_day_ago,
+                    '_versions': [
+                        {
+                            '_dateCreated': a_year_and_a_day_ago,
+                            '_dateAccepted': now.isoformat(),
+                            '_uuid': 'uuid2',
+                            'language': 'en',
+                            'value': 'This is audio that I am trying to transcribe.',
+                            'status': 'complete',
+                        }
+                    ]
+                },
+                'automatic_google_translation': {
+                    'es': {
+                        '_dateCreated': one_year_ago,
+                        '_dateModified': one_year_ago,
+                        '_versions': [
+                            {
+                                '_dateCreated': one_year_ago,
+                                '_dateAccepted': now.isoformat(),
+                                '_dependency': {
+                                    '_actionId': 'automatic_google_transcription',
+                                    '_uuid': 'uuid2',
+                                },
+                                '_uuid': 'uuid4',
+                                'language': 'es',
+                                'value': 'Esto es un audio que estoy intentando a'
+                                ' transcribir.',
+                                'status': 'complete',
+                            }
+                        ]
+                    }
+                },
+                'manual_transcription': {
+                    '_dateCreated': one_day_ago,
+                    '_dateModified': one_day_ago,
+                    '_versions': [
+                        {
+                            '_dateCreated': one_day_ago,
+                            '_dateAccepted': now.isoformat(),
+                            '_uuid': 'uuid1',
+                            'language': 'en',
+                            'value': 'This is audio that I am trying to '
+                                     'transcribe but i edited it.',
+                        }
+                    ]
+                },
+                'manual_translation': {
+                    'es': {
+                        '_dateCreated': now.isoformat(),
+                        '_dateModified': now.isoformat(),
+                        '_versions': [
+                            {
+                                '_dateCreated': now.isoformat(),
+                                '_dateAccepted': now.isoformat(),
+                                '_dependency': {'_actionId': 'manual_transcription',
+                                                '_uuid': 'uuid1'},
+                                '_uuid': 'uuid3',
+                                'language': 'es',
+                                'value': 'Esto es un audio que estoy intentando'
+                                ' transcribir pero yo lo edité',
+                            }
+                        ]
+                    }
+                },
+            }
+        }
+        assert migrated == new_version
diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index aba7b21852..9d29d38b9c 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -1,3 +1,7 @@
+import uuid
+
+from django.utils import timezone
+
 from ..constants import SCHEMA_VERSIONS
 
 
@@ -6,10 +10,7 @@ def migrate_advanced_features(advanced_features: dict) -> dict | None:
     if advanced_features.get('_version') == SCHEMA_VERSIONS[0]:
         return
 
-    migrated_advanced_features = {
-        '_version': SCHEMA_VERSIONS[0],
-        '_actionConfigs': {}
-    }
+    migrated_advanced_features = {'_version': SCHEMA_VERSIONS[0], '_actionConfigs': {}}
 
     actionConfigs = migrated_advanced_features['_actionConfigs']
     for key, value in advanced_features.items():
@@ -39,6 +40,219 @@ def migrate_advanced_features(advanced_features: dict) -> dict | None:
     return migrated_advanced_features
 
 
+def migrate_submission_supplementals(supplemental_data: dict) -> dict | None:
+    if supplemental_data.get('_version') == SCHEMA_VERSIONS[0]:
+        return
+    supplemental = {
+        '_version': SCHEMA_VERSIONS[0],
+    }
+    for question_xpath, action_results in supplemental_data.items():
+        question_results_by_action = {}
+
+        # get all the automatic result data
+        automatic_transcript = action_results.get('googlets', {})
+        automatic_transcript_language = automatic_transcript.get('languageCode')
+        automatic_transcript_value = automatic_transcript.get('value')
+        automatic_translation = action_results.get('googletx', {})
+        automatic_translation_language = automatic_translation.get('languageCode')
+        automatic_translation_value = automatic_translation.get('value')
+        automatic_translation_source_language = automatic_translation.get('source')
+
+        # divide transcripts into manual and automatic
+        manual_transcripts, automatic_transcripts = (
+            _separate_manual_and_automatic_versions(
+                action_results.get('transcript'),
+                automatic_transcript_language,
+                automatic_transcript_value,
+            )
+        )
+
+        if len(manual_transcripts) > 0:
+            question_results_by_action['manual_transcription'] = (
+                _version_list_to_summary_dict(manual_transcripts)
+            )
+        if len(automatic_transcripts) > 0:
+            question_results_by_action['automatic_google_transcription'] = (
+                _version_list_to_summary_dict(automatic_transcripts)
+            )
+
+        # process translations
+        translations_dict = action_results.get('translation', {})
+        automatic_translations = {}
+        manual_translations = {}
+
+        # divide translations into manual and automatic by language
+        for language_code, translations in translations_dict.items():
+            manual_translations_for_language, automatic_translations_for_language = (
+                _separate_manual_and_automatic_versions(
+                    translations,
+                    automatic_translation_language,
+                    automatic_translation_value,
+                    language_code,
+                )
+            )
+
+            all_tagged_transcripts = _combine_source_transcripts(
+                manual_transcripts, automatic_transcripts
+            )
+            if len(automatic_translations_for_language) > 0:
+                _add_translation_sources(
+                    automatic_translations_for_language,
+                    all_tagged_transcripts,
+                    automatic_translation_source_language,
+                )
+                automatic_translations[language_code] = _version_list_to_summary_dict(
+                    automatic_translations_for_language
+                )
+            if len(manual_translations_for_language) > 0:
+                _add_translation_sources(
+                    manual_translations_for_language, all_tagged_transcripts
+                )
+                manual_translations[language_code] = _version_list_to_summary_dict(
+                    manual_translations_for_language
+                )
+        if automatic_translations != {}:
+            question_results_by_action['automatic_google_translation'] = (
+                automatic_translations
+            )
+        if manual_translations != {}:
+            question_results_by_action['manual_translation'] = manual_translations
+        supplemental[question_xpath] = question_results_by_action
+
+    return supplemental
+
+
 def set_version(schema: dict) -> dict:
     schema['_version'] = SCHEMA_VERSIONS[0]
     return schema
+
+
+def _add_translation_sources(
+    version_list, all_tagged_transcripts, automatic_translation_source_language=None
+):
+    for translation in version_list:
+        # determine and record the most likely source transcript
+        source = _determine_source_transcript(
+            translation,
+            all_tagged_transcripts,
+            automatic_source_language=automatic_translation_source_language,
+        )
+        translation['_dependency'] = {
+            '_uuid': source['_uuid'],
+            '_actionId': source['_actionId'],
+        }
+
+
+def _combine_source_transcripts(manual_transcripts, automatic_transcripts):
+    # Combine manual and automatic transcripts and sort by dateCreated descending
+    # tag them with the action so we don't lose track
+    tagged_manual_transcripts = [
+        {**transcript, '_actionId': 'manual_transcription'}
+        for transcript in manual_transcripts
+    ]
+    tagged_automatic_transcripts = [
+        {**transcript, '_actionId': 'automatic_google_transcription'}
+        for transcript in automatic_transcripts
+    ]
+
+    all_tagged_transcripts = [*tagged_manual_transcripts, *tagged_automatic_transcripts]
+    all_tagged_transcripts.sort(reverse=True, key=lambda d: d['_dateCreated'])
+    return all_tagged_transcripts
+
+
+def _determine_source_transcript(
+    translation_revision, all_transcripts, automatic_source_language=None
+):
+    if automatic_source_language:  # we know the source language
+        transcripts_matching_language = [
+            transcript
+            for transcript in all_transcripts
+            if transcript['language'] == automatic_source_language
+        ]
+        for transcript in transcripts_matching_language:
+            # is there a transcript in the source language created earlier than the
+            # translation?
+            if transcript['_dateCreated'] < translation_revision['_dateCreated']:
+                return transcript
+        # if not, is there *any* transcript in the source language? take the most
+        # recent one
+        if len(transcripts_matching_language) > 0:
+            return transcripts_matching_language[0]
+    else:
+        # is there a transcript older than the translation?
+        for transcript in all_transcripts:
+            if transcript['_dateCreated'] < translation_revision['_dateCreated']:
+                return transcript
+    # default to the most recent transcript
+    return all_transcripts[0]
+
+
+def _new_revision_from_old(old_transcript_revision_dict: dict) -> dict | None:
+    now = timezone.now().isoformat()
+    # ignore bad data
+    if (
+        'languageCode' not in old_transcript_revision_dict
+        or 'value' not in old_transcript_revision_dict
+    ):
+        return None
+    return {
+        '_dateCreated': old_transcript_revision_dict.get('dateModified'),
+        'language': old_transcript_revision_dict['languageCode'],
+        'value': old_transcript_revision_dict['value'],
+        '_uuid': str(uuid.uuid4()),
+        # all preexisting translations/transcripts are considered accepted
+        '_dateAccepted': now,
+    }
+
+
+def _separate_manual_and_automatic_versions(
+    old_action_dictionary,
+    automatic_result_language,
+    automatic_result_value,
+    # translations have an expected language
+    language=None,
+):
+    automatic_versions = []
+    manual_versions = []
+    latest_revision = {
+        key: val
+        for key, val in old_action_dictionary.items()
+        if key in ['value', 'languageCode', 'dateModified']
+    }
+    # add the latest revision to the list of all revisions for easier processing
+    all_revisions = [latest_revision, *old_action_dictionary.get('revisions', [])]
+    for revision in all_revisions:
+        if language:
+            # force the expected language if given
+            revision['languageCode'] = language
+        revision_formatted = _new_revision_from_old(revision)
+        if revision_formatted is None:
+            continue
+        # if the language and value match that of the automatic result,
+        # assume this one was generated automatically
+        matches_automatic_result = (
+            revision_formatted['language'] == automatic_result_language
+            and revision_formatted['value'] == automatic_result_value
+        )
+        correct_version_list_to_append = (
+            automatic_versions if matches_automatic_result else manual_versions
+        )
+        if matches_automatic_result:
+            # automatic versions also need a status
+            revision_formatted['status'] = 'complete'
+        correct_version_list_to_append.append(revision_formatted)
+
+    # they should be sorted anyway, but just make sure in case the input values
+    # weren't sorted correctly
+    manual_versions.sort(reverse=True, key=lambda d: d['_dateCreated'])
+    automatic_versions.sort(reverse=True, key=lambda d: d['_dateCreated'])
+
+    return manual_versions, automatic_versions
+
+
+def _version_list_to_summary_dict(list_of_versions: list[dict]) -> dict:
+    return {
+        '_dateCreated': list_of_versions[-1]['_dateCreated'],
+        '_dateModified': list_of_versions[0]['_dateCreated'],
+        '_versions': list_of_versions,
+    }