diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index fa5f5d73f3f..b4bc1befb29 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -137,7 +137,6 @@ ddtrace/profiling                   @DataDog/profiling-python
 ddtrace/internal/settings/profiling.py       @DataDog/profiling-python
 ddtrace/internal/datadog/profiling  @DataDog/profiling-python
 tests/profiling                     @DataDog/profiling-python
-tests/profiling_v2                  @DataDog/profiling-python
 .gitlab/tests/profiling.yml         @DataDog/profiling-python
 
 # MLObs
diff --git a/hatch.toml b/hatch.toml
index 5c2aea3d94d..f16412b9040 100644
--- a/hatch.toml
+++ b/hatch.toml
@@ -158,7 +158,7 @@ DD_PROFILING_PYTORCH_ENABLED = "true"
 test = [
     "uname -a",
     "pip freeze",
-    "python -m pytest tests/profiling_v2/test_pytorch.py -vvv --capture=tee-sys",
+    "python -m pytest tests/profiling/test_pytorch.py -vvv --capture=tee-sys",
 ]
 
 [[envs.profiling_pytorch.matrix]]
diff --git a/pyproject.toml b/pyproject.toml
index 0d96d70749f..88b150dd18a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -148,7 +148,6 @@ exclude = [
     "ddtrace/vendor/*",
     "ddtrace/appsec/_iast/_taint_tracking/_vendor/*",
     "tests/profiling/collector/pprof_*pb2.py",
-    "tests/profiling/simple_program_gevent.py",
     "tests/contrib/grpc/hello_pb2.py",
     "tests/contrib/django_celery/app/*",
     "tests/contrib/protobuf/schemas/**/*.py",
diff --git a/riotfile.py b/riotfile.py
index da36295eecc..4cbbb33d267 100644
--- a/riotfile.py
+++ b/riotfile.py
@@ -3235,7 +3235,7 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT
         Venv(
             name="profile-v2",
             # NB riot commands that use this Venv must include --pass-env to work properly
-            command="python -m tests.profiling.run pytest -v --no-cov --capture=no --benchmark-disable --ignore='tests/profiling_v2/collector/test_memalloc.py' {cmdargs} tests/profiling_v2",  # noqa: E501
+            command="python -m tests.profiling.run pytest -v --no-cov --capture=no --benchmark-disable --ignore='tests/profiling/collector/test_memalloc.py' {cmdargs} tests/profiling",  # noqa: E501
             env={
                 "DD_PROFILING_ENABLE_ASSERTS": "1",
                 "CPUCOUNT": "12",
@@ -3258,7 +3258,7 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT
             venvs=[
                 Venv(
                     name="profile-v2-uwsgi",
-                    command="python -m tests.profiling.run pytest -v --no-cov --capture=no --benchmark-disable {cmdargs} tests/profiling_v2/test_uwsgi.py",  # noqa: E501
+                    command="python -m tests.profiling.run pytest -v --no-cov --capture=no --benchmark-disable {cmdargs} tests/profiling/test_uwsgi.py",  # noqa: E501
                     pys=select_pys(max_version="3.13"),
                     pkgs={
                         "uwsgi": "<2.0.30",
@@ -3376,7 +3376,7 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT
                 ),
                 Venv(
                     name="profile-v2-memalloc",
-                    command="python -m tests.profiling.run pytest -v --no-cov --capture=no --benchmark-disable {cmdargs} tests/profiling_v2/collector/test_memalloc.py",  # noqa: E501
+                    command="python -m tests.profiling.run pytest -v --no-cov --capture=no --benchmark-disable {cmdargs} tests/profiling/collector/test_memalloc.py",  # noqa: E501
                     # skipping v3.14 for now due to an unstable `lz4 ` lib issue: https://gitlab.ddbuild.io/DataDog/apm-reliability/dd-trace-py/-/jobs/1163312347
                     pys=select_pys(max_version="3.13"),
                     pkgs={
diff --git a/scripts/check_suitespec_coverage.py b/scripts/check_suitespec_coverage.py
index 5594aeb4c51..c34029f0422 100755
--- a/scripts/check_suitespec_coverage.py
+++ b/scripts/check_suitespec_coverage.py
@@ -24,10 +24,6 @@
 
 # Ignore any embedded documentation
 IGNORE_PATTERNS.add("**/*.md")
-# TODO(taegyunkim): remove these after merging profiling v2 tests back to profiling
-IGNORE_PATTERNS.add("tests/profiling/*.py")
-IGNORE_PATTERNS.add("tests/profiling/*/*.py")
-IGNORE_PATTERNS.add("tests/profiling/*/*.proto")
 
 
 def owners(path: str) -> str:
diff --git a/tests/README.md b/tests/README.md
index 8dcf61e07f3..3d002f0856e 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -63,7 +63,6 @@ suites:
       - '@core'
       - '@profiling'
       - tests/profiling/*
-      - tests/profiling_v2/*
     services:
       - redis
 ```
diff --git a/tests/profiling/collector/conftest.py b/tests/profiling/collector/conftest.py
index 8475c4d0a22..7dc1d816091 100644
--- a/tests/profiling/collector/conftest.py
+++ b/tests/profiling/collector/conftest.py
@@ -1,22 +1,8 @@
 import pytest
 
 import ddtrace
-from ddtrace.profiling import Profiler
-from tests.utils import override_global_config
 
 
 @pytest.fixture
 def tracer():
-    with override_global_config(dict(_startup_logs_enabled=False)):
-        yield ddtrace.trace.tracer
-
-
-@pytest.fixture
-def profiler(monkeypatch):
-    monkeypatch.setenv("DD_PROFILING_API_TIMEOUT_MS", "100")
-    p = Profiler()
-    p.start()
-    try:
-        yield p
-    finally:
-        p.stop()
+    return ddtrace.trace.tracer
diff --git a/tests/profiling_v2/collector/test_async_generator.py b/tests/profiling/collector/test_async_generator.py
similarity index 100%
rename from tests/profiling_v2/collector/test_async_generator.py
rename to tests/profiling/collector/test_async_generator.py
diff --git a/tests/profiling_v2/collector/test_asyncio.py b/tests/profiling/collector/test_asyncio.py
similarity index 100%
rename from tests/profiling_v2/collector/test_asyncio.py
rename to tests/profiling/collector/test_asyncio.py
diff --git a/tests/profiling/collector/test_memalloc.py b/tests/profiling/collector/test_memalloc.py
index 28043f02d92..5bca67c00d7 100644
--- a/tests/profiling/collector/test_memalloc.py
+++ b/tests/profiling/collector/test_memalloc.py
@@ -1,184 +1,809 @@
-# -*- encoding: utf-8 -*-
 import gc
+import inspect
 import os
 import sys
 import threading
 
 import pytest
 
-from ddtrace.internal.settings.profiling import ProfilingConfig
-from ddtrace.internal.settings.profiling import _derive_default_heap_sample_size
+from ddtrace.internal.datadog.profiling import ddup
 from ddtrace.profiling.collector import memalloc
 from ddtrace.profiling.event import DDFrame
+from tests.profiling.collector import pprof_utils
 
 
-try:
-    from ddtrace.profiling.collector import _memalloc
-except ImportError:
-    pytestmark = pytest.mark.skip("_memalloc not available")
+PY_313_OR_ABOVE = sys.version_info[:2] >= (3, 13)
 
 
-def test_start_twice():
-    _memalloc.start(64, 512)
-    with pytest.raises(RuntimeError):
-        _memalloc.start(64, 512)
-    _memalloc.stop()
+def _allocate_1k():
+    return [object() for _ in range(1000)]
 
 
-def test_start_wrong_arg():
-    with pytest.raises(TypeError, match="function takes exactly 2 arguments \\(1 given\\)"):
-        _memalloc.start(2)
+_ALLOC_LINE_NUMBER = _allocate_1k.__code__.co_firstlineno + 1
 
-    with pytest.raises(ValueError, match="the number of frames must be in range \\[1; 65535\\]"):
-        _memalloc.start(429496, 1)
 
-    with pytest.raises(ValueError, match="the number of frames must be in range \\[1; 65535\\]"):
-        _memalloc.start(-1, 1)
+# This test is marked as subprocess as it changes default heap sample size
+@pytest.mark.subprocess(
+    env=dict(DD_PROFILING_HEAP_SAMPLE_SIZE="1024", DD_PROFILING_OUTPUT_PPROF="/tmp/test_heap_samples_collected")
+)
+def test_heap_samples_collected():
+    import os
+
+    from ddtrace.profiling import Profiler
+    from tests.profiling.collector import pprof_utils
+    from tests.profiling.collector.test_memalloc import _allocate_1k
+
+    # Test for https://github.com/DataDog/dd-trace-py/issues/11069
+    pprof_prefix = os.environ["DD_PROFILING_OUTPUT_PPROF"]
+    output_filename = pprof_prefix + "." + str(os.getpid())
+
+    p = Profiler()
+    p.start()
+    x = _allocate_1k()  # noqa: F841
+    p.stop()
+
+    profile = pprof_utils.parse_newest_profile(output_filename)
+    samples = pprof_utils.get_samples_with_value_type(profile, "heap-space")
+    assert len(samples) > 0
+
+
+def test_memory_collector(tmp_path):
+    test_name = "test_memory_collector"
+    pprof_prefix = str(tmp_path / test_name)
+    output_filename = pprof_prefix + "." + str(os.getpid())
+
+    ddup.config(
+        service=test_name,
+        version="test",
+        env="test",
+        output_filename=pprof_prefix,
+    )
+    ddup.start()
+
+    mc = memalloc.MemoryCollector(heap_sample_size=256)
+    with mc:
+        _allocate_1k()
+        mc.snapshot()
+
+    ddup.upload()
+
+    profile = pprof_utils.parse_newest_profile(output_filename)
+    # Gets samples with alloc-space > 0
+    samples = pprof_utils.get_samples_with_value_type(profile, "alloc-space")
+
+    assert len(samples) > 0
+
+    alloc_samples_idx = pprof_utils.get_sample_type_index(profile, "alloc-samples")
+    for sample in samples:
+        # We also want to check 'alloc-samples' is > 0.
+        assert sample.value[alloc_samples_idx] > 0
+
+    # We also want to assert that there's a sample that's coming from _allocate_1k()
+    # And also assert that it's actually coming from _allocate_1k()
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            thread_id=threading.main_thread().ident,
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name="_allocate_1k", filename="test_memalloc.py", line_no=_ALLOC_LINE_NUMBER
+                )
+            ],
+        ),
+    )
 
-    with pytest.raises(
-        ValueError,
-        match="the heap sample size must be in range \\[0; 4294967295\\]",
-    ):
-        _memalloc.start(64, -1)
 
-    with pytest.raises(
-        ValueError,
-        match="the heap sample size must be in range \\[0; 4294967295\\]",
-    ):
-        _memalloc.start(64, 345678909876)
+def test_memory_collector_ignore_profiler(tmp_path):
+    test_name = "test_memory_collector_ignore_profiler"
+    pprof_prefix = str(tmp_path / test_name)
+    output_filename = pprof_prefix + "." + str(os.getpid())
 
+    ddup.config(
+        service=test_name,
+        version="test",
+        env="test",
+        output_filename=pprof_prefix,
+    )
+    ddup.start()
 
-def test_start_stop():
-    _memalloc.start(1, 1)
-    _memalloc.stop()
+    mc = memalloc.MemoryCollector(ignore_profiler=True)
+    quit_thread = threading.Event()
 
+    with mc:
 
-def _allocate_1k():
-    return [object() for _ in range(1000)]
+        def alloc():
+            _allocate_1k()
+            quit_thread.wait()
 
+        alloc_thread = threading.Thread(name="allocator", target=alloc)
+        alloc_thread._ddtrace_profiling_ignore = True
+        alloc_thread.start()
 
-_ALLOC_LINE_NUMBER = _allocate_1k.__code__.co_firstlineno + 1
+        mc.snapshot()
 
+    # We need to wait for the data collection to happen so it gets the `_ddtrace_profiling_ignore` Thread attribute from
+    # the global thread list.
+    quit_thread.set()
+    alloc_thread.join()
 
-def _pre_allocate_1k():
-    return _allocate_1k()
+    ddup.upload()
 
+    try:
+        pprof_utils.parse_newest_profile(output_filename)
+    except AssertionError as e:
+        assert "No samples found" in str(e)
 
-def test_iter_events():
-    max_nframe = 32
-    collector = memalloc.MemoryCollector(max_nframe=max_nframe, heap_sample_size=64)
-    with collector:
-        _allocate_1k()
-        samples = collector.test_snapshot()
-    alloc_samples = [s for s in samples if s.alloc_size > 0]
 
-    total_alloc_count = sum(s.count for s in alloc_samples)
+@pytest.mark.subprocess(
+    env=dict(DD_PROFILING_HEAP_SAMPLE_SIZE="8", DD_PROFILING_OUTPUT_PPROF="/tmp/test_heap_profiler_large_heap_overhead")
+)
+def test_heap_profiler_large_heap_overhead():
+    # TODO(nick): this test case used to crash due to integer arithmetic bugs.
+    # Now it doesn't crash, but it takes far too long to run to be useful in CI.
+    # Un-skip this test if/when we improve the worst-case performance of the
+    # heap profiler for large heaps
+    from ddtrace.profiling import Profiler
+    from tests.profiling.collector.test_memalloc import one
 
-    assert total_alloc_count >= 1000
-    # Watchout: if we dropped samples the test will likely fail
+    p = Profiler()
+    p.start()
 
-    object_count = 0
-    for sample in alloc_samples:
-        stack = sample.frames
-        thread_id = sample.thread_id
-        size = sample.alloc_size
+    count = 100_000
+    thing_size = 32
 
-        assert 0 < len(stack) <= max_nframe
-        assert size >= 1  # size depends on the object size
-        last_call = stack[0]
-        if last_call == DDFrame(
-            __file__,
-            _ALLOC_LINE_NUMBER,
-            "<listcomp>" if sys.version_info < (3, 12) else "_allocate_1k",
-            "",
-        ):
-            assert thread_id == threading.main_thread().ident
-            if sys.version_info < (3, 12) and len(stack) > 1:
-                assert stack[1] == DDFrame(__file__, _ALLOC_LINE_NUMBER, "_allocate_1k", "")
-            object_count += sample.count
+    junk = []
+    for i in range(count):
+        b1 = one(thing_size)
+        b2 = one(2 * thing_size)
+        b3 = one(3 * thing_size)
+        b4 = one(4 * thing_size)
+        t = (b1, b2, b3, b4)
+        junk.append(t)
 
-    assert object_count >= 1000
+    del junk
 
+    p.stop()
 
-def test_iter_events_dropped():
-    max_nframe = 32
-    collector = memalloc.MemoryCollector(max_nframe=max_nframe, heap_sample_size=64)
-    with collector:
-        _allocate_1k()
-        samples = collector.test_snapshot()
-    alloc_samples = [s for s in samples if s.alloc_size > 0]
 
-    total_alloc_count = sum(s.count for s in alloc_samples)
+# one, two, three, and four exist to give us distinct things
+# we can find in the profile without depending on something
+# like the line number at which an allocation happens
+# Python 3.13 changed bytearray to use an allocation domain that we don't
+# currently profile, so we use None instead of bytearray to test.
+def one(size):
+    return (None,) * size if PY_313_OR_ABOVE else bytearray(size)
 
-    assert len(alloc_samples) > 0
-    assert total_alloc_count >= 1000
 
+def two(size):
+    return (None,) * size if PY_313_OR_ABOVE else bytearray(size)
 
-def test_iter_events_not_started():
-    collector = memalloc.MemoryCollector()
-    samples = collector.test_snapshot()
-    assert samples == ()
 
+def three(size):
+    return (None,) * size if PY_313_OR_ABOVE else bytearray(size)
 
-@pytest.mark.skipif(
-    os.getenv("DD_PROFILE_TEST_GEVENT", False),
-    reason="Test not compatible with gevent",
+
+def four(size):
+    return (None,) * size if PY_313_OR_ABOVE else bytearray(size)
+
+
+def _create_allocation(size):
+    return (None,) * size if PY_313_OR_ABOVE else bytearray(size)
+
+
+class HeapInfo:
+    def __init__(self, count, size):
+        self.count = count
+        self.size = size
+
+
+def get_heap_info(heap, funcs):
+    got = {}
+    for event in heap:
+        (frames, _), in_use_size, alloc_size, count = event
+
+        in_use = in_use_size > 0
+        size = in_use_size if in_use_size > 0 else alloc_size
+
+        if not in_use:
+            continue
+        func = frames[0].function_name
+        if func in funcs:
+            v = got.get(func, HeapInfo(0, 0))
+            v.count += 1
+            v.size += size
+            got[func] = v
+    return got
+
+
+def has_function_in_traceback(frames, function_name):
+    return any(frame.function_name == function_name for frame in frames)
+
+
+def get_tracemalloc_stats_per_func(stats, funcs):
+    source_to_func = {}
+
+    for f in funcs:
+        file = inspect.getsourcefile(f)
+        line = inspect.getsourcelines(f)[1] + 1
+        source_to_func[str(file) + str(line)] = f.__name__
+
+    actual_sizes = {}
+    actual_counts = {}
+    for stat in stats:
+        f = stat.traceback[0]
+        key = f.filename + str(f.lineno)
+        if key in source_to_func:
+            func_name = source_to_func[key]
+            actual_sizes[func_name] = stat.size
+            actual_counts[func_name] = stat.count
+    return actual_sizes, actual_counts
+
+
+# TODO: higher sampling intervals have a lot more variance and are flaky
+# but would be nice to test since our default is 1MiB
+@pytest.mark.parametrize("sample_interval", (8, 512, 1024))
+def test_heap_profiler_sampling_accuracy(sample_interval):
+    # tracemalloc lets us get ground truth on how many allocations there were
+    import tracemalloc
+
+    # TODO(nick): use Profiler instead of _memalloc
+    from ddtrace.profiling.collector import _memalloc
+
+    # We seed the RNG to reduce flakiness. This doesn't actually diminish the
+    # quality of the test much. A broken sampling implementation is unlikely to
+    # pass for an arbitrary seed.
+    old = os.environ.get("_DD_MEMALLOC_DEBUG_RNG_SEED")
+    os.environ["_DD_MEMALLOC_DEBUG_RNG_SEED"] = "42"
+    _memalloc.start(32, sample_interval)
+    # Put the env var back in the state we found it
+    if old is not None:
+        os.environ["_DD_MEMALLOC_DEBUG_RNG_SEED"] = old
+    else:
+        del os.environ["_DD_MEMALLOC_DEBUG_RNG_SEED"]
+
+    tracemalloc.start()
+
+    junk = []
+    for i in range(1000):
+        size = 256
+        junk.append(one(size))
+        junk.append(two(2 * size))
+        junk.append(three(3 * size))
+        junk.append(four(4 * size))
+
+    # TODO(nick): randomly remove things from junk to see if the profile is
+    # still accurate
+
+    # Stop tracemalloc before collecting the heap sample, since tracemalloc
+    # is _really_ slow when the _memalloc.heap() call does lots of allocs for
+    # lower sample intervals (i.e. more sampled allocations)
+    stats = tracemalloc.take_snapshot().statistics("traceback")
+    tracemalloc.stop()
+
+    heap = _memalloc.heap()
+    # Important: stop _memalloc _after_ tracemalloc. Need to remove allocator
+    # hooks in LIFO order.
+    _memalloc.stop()
+
+    actual_sizes, _ = get_tracemalloc_stats_per_func(stats, (one, two, three, four))
+    actual_total = sum(actual_sizes.values())
+
+    del junk
+
+    sizes = get_heap_info(heap, {"one", "two", "three", "four"})
+
+    total = sum(v.size for v in sizes.values())
+    print(f"observed total: {total} actual total: {actual_total} error: {abs(total - actual_total) / actual_total}")
+    # 20% error in actual size feels pretty generous
+    # TODO(nick): justify in terms of variance of sampling?
+    assert abs(1 - total / actual_total) <= 0.20
+
+    print("func\tcount\tsize\tactual\trel\tactual\tdiff")
+    for func in ("one", "two", "three", "four"):
+        got = sizes[func]
+        actual_size = actual_sizes[func]
+
+        # Relative portion of the bytes in the profile for this function
+        # out of the functions we're interested in
+        rel = got.size / total
+        actual_rel = actual_size / actual_total
+
+        print(
+            f"{func}\t{got.count}\t{got.size}\t{actual_size}\t{rel:.3f}\t{actual_rel:.3f}\t{abs(rel - actual_rel):.3f}"
+        )
+
+        # Assert that the reported portion of this function in the profile is
+        # pretty close to the actual portion. So, if it's actually ~20% of the
+        # profile then we'd accept anything between 10% and 30%, which is
+        # probably too generous for low sampling intervals but at least won't be
+        # flaky.
+        assert abs(rel - actual_rel) < 0.10
+
+
+@pytest.mark.skip(reason="too slow, indeterministic")
+@pytest.mark.subprocess(
+    env=dict(
+        # Turn off other profilers so that we're just testing memalloc
+        DD_PROFILING_STACK_ENABLED="false",
+        DD_PROFILING_LOCK_ENABLED="false",
+        # Upload a lot, since rotating out memalloc profiler state can race with profiling
+        DD_PROFILING_UPLOAD_INTERVAL="1",
+    ),
 )
-def test_iter_events_multi_thread():
-    max_nframe = 32
-    t = threading.Thread(target=_allocate_1k)
-    collector = memalloc.MemoryCollector(max_nframe=max_nframe, heap_sample_size=64)
-    with collector:
-        _allocate_1k()
+def test_memealloc_data_race_regression():
+    import gc
+    import threading
+    import time
+
+    from ddtrace.profiling import Profiler
+
+    gc.enable()
+    # This threshold is controls when garbage collection is triggered. The
+    # threshold is on the count of live allocations, which is checked when doing
+    # a new allocation. This test is ultimately trying to get the allocation of
+    # frame objects during the memory profiler's traceback function to trigger
+    # garbage collection. We want a lower threshold to improve the odds that
+    # this happens.
+    gc.set_threshold(100)
+
+    class Thing:
+        def __init__(self):
+            # Self reference so this gets deallocated in GC vs via refcount
+            self.ref = self
+
+        def __del__(self):
+            # Force GIL yield,  so if/when memalloc triggers GC, this is
+            # deallocated, releasing GIL while memalloc is sampling and allowing
+            # something else to run and possibly modify memalloc's internal
+            # state concurrently
+            time.sleep(0)
+
+    def do_alloc():
+        def f():
+            return Thing()
+
+        return f
+
+    def lotsa_allocs(ev):
+        while not ev.is_set():
+            f = do_alloc()
+            f()
+            time.sleep(0.01)
+
+    p = Profiler()
+    p.start()
+
+    threads = []
+    ev = threading.Event()
+    for i in range(4):
+        t = threading.Thread(target=lotsa_allocs, args=(ev,))
         t.start()
+        threads.append(t)
+
+    # Arbitrary sleep. This typically crashes in about a minute.
+    # But for local development, either let it run way longer or
+    # figure out sanitizer instrumentation
+    time.sleep(120)
+
+    p.stop()
+
+    ev.set()
+    for t in threads:
         t.join()
 
-        samples = collector.test_snapshot()
-    alloc_samples = [s for s in samples if s.alloc_size > 0]
 
-    total_alloc_count = sum(s.count for s in alloc_samples)
+@pytest.mark.parametrize("sample_interval", (256, 512, 1024))
+def test_memory_collector_allocation_accuracy_with_tracemalloc(sample_interval):
+    import tracemalloc
 
-    assert total_alloc_count >= 1000
+    old = os.environ.get("_DD_MEMALLOC_DEBUG_RNG_SEED")
+    os.environ["_DD_MEMALLOC_DEBUG_RNG_SEED"] = "42"
 
-    # Watchout: if we dropped samples the test will likely fail
+    mc = memalloc.MemoryCollector(heap_sample_size=sample_interval)
 
-    count_object = 0
-    count_thread = 0
-    for sample in alloc_samples:
-        stack = sample.frames
-        thread_id = sample.thread_id
-        size = sample.alloc_size
+    try:
+        with mc:
+            tracemalloc.start()
 
-        assert 0 < len(stack) <= max_nframe
-        assert size >= 1  # size depends on the object size
-        last_call = stack[0]
-        if last_call == DDFrame(
-            __file__,
-            _ALLOC_LINE_NUMBER,
-            "<listcomp>" if sys.version_info < (3, 12) else "_allocate_1k",
-            "",
-        ):
-            if thread_id == threading.main_thread().ident:
-                count_object += sample.count
-                if sys.version_info < (3, 12) and len(stack) > 1:
-                    assert stack[1] == DDFrame(__file__, _ALLOC_LINE_NUMBER, "_allocate_1k", "")
-            elif thread_id == t.ident:
-                count_thread += sample.count
-                entry = 2 if sys.version_info < (3, 12) else 1
-                if entry < len(stack):
-                    assert stack[entry].file_name == threading.__file__
-                    assert stack[entry].lineno > 0
-                    assert stack[entry].function_name == "run"
+            junk = []
+            for i in range(1000):
+                size = 256
+                junk.append(one(size))
+                junk.append(two(2 * size))
+                junk.append(three(3 * size))
+                junk.append(four(4 * size))
 
-    assert count_object >= 1000
-    assert count_thread >= 1000
+            stats = tracemalloc.take_snapshot().statistics("traceback")
+            tracemalloc.stop()
 
+            del junk
 
-def test_heap():
-    max_nframe = 32
-    collector = memalloc.MemoryCollector(max_nframe=max_nframe, heap_sample_size=1024)
-    with collector:
-        _test_heap_impl(collector, max_nframe)
+            samples = mc.test_snapshot()
+
+    finally:
+        if old is not None:
+            os.environ["_DD_MEMALLOC_DEBUG_RNG_SEED"] = old
+        else:
+            if "_DD_MEMALLOC_DEBUG_RNG_SEED" in os.environ:
+                del os.environ["_DD_MEMALLOC_DEBUG_RNG_SEED"]
+
+    allocation_samples = [s for s in samples if s.in_use_size == 0]
+    heap_samples = [s for s in samples if s.in_use_size > 0]
+
+    print(f"Total samples: {len(samples)}")
+    print(f"Allocation samples (in_use_size=0): {len(allocation_samples)}")
+    print(f"Heap samples (in_use_size>0): {len(heap_samples)}")
+
+    assert len(allocation_samples) > 0, "Should have captured allocation samples after deletion"
+
+    total_allocation_count = 0
+    for sample in allocation_samples:
+        assert sample.size > 0, f"Invalid allocation sample size: {sample.size}"
+        assert sample.count > 0, f"Invalid allocation sample count: {sample.count}"
+        assert sample.in_use_size == 0, f"Allocation sample should have in_use_size=0, got: {sample.in_use_size}"
+        assert sample.in_use_size >= 0, f"Invalid in_use_size: {sample.in_use_size}"
+        assert sample.alloc_size >= 0, f"Invalid alloc_size: {sample.alloc_size}"
+        total_allocation_count += sample.count
+
+    print(f"Total allocation count: {total_allocation_count}")
+    assert total_allocation_count >= 1, "Should have captured at least 1 allocation sample"
+
+    actual_sizes, actual_counts = get_tracemalloc_stats_per_func(stats, (one, two, three, four))
+    actual_total = sum(actual_sizes.values())
+    actual_count_total = sum(actual_counts.values())
+
+    def get_allocation_info(samples, funcs):
+        got = {}
+        for sample in samples:
+            if sample.in_use_size > 0:
+                continue
+
+            for frame in sample.frames:
+                func = frame.function_name
+                if func in funcs:
+                    v = got.get(func, HeapInfo(0, 0))
+                    v.count += sample.count
+                    v.size += sample.alloc_size
+                    got[func] = v
+                    break
+        return got
+
+    sizes = get_allocation_info(samples, {"one", "two", "three", "four"})
+
+    total = sum(v.size for v in sizes.values())
+    total_count = sum(v.count for v in sizes.values())
+
+    print(f"observed total: {total} actual total: {actual_total} error: {abs(total - actual_total) / actual_total}")
+    assert abs(1 - total / actual_total) <= 0.20
+
+    count_error = abs(total_count - actual_count_total) / actual_count_total
+    print(f"observed count total: {total_count} actual count total: {actual_count_total} error: {count_error}")
+    # Commenting out the total count assertions because we still have more work to do on this.
+    # Our reported counts differed from the actual count by more than we expected, while the reported sizes
+    # are accurate. Our counts seem to be consistently lower than expected for the sample intervals we're testing.
+    # We'll need to double-check our count scaling before making assertions about the actual values
+    # assert abs(1 - total_count / actual_count_total) <= 0.30
+
+    print("func\tcount\tsize\tactual_size\tactual_count\trel_size\tactual_rel_size\trel_count\tactual_rel_count")
+    for func in ("one", "two", "three", "four"):
+        got = sizes[func]
+        actual_size = actual_sizes[func]
+        actual_count = actual_counts[func]
+
+        rel_size = got.size / total
+        actual_rel_size = actual_size / actual_total
+
+        rel_count = got.count / total_count
+        actual_rel_count = actual_count / actual_count_total
+
+        print(
+            f"{func}\t{got.count}\t{got.size}\t{actual_size}\t{actual_count}\t{rel_size:.3f}\t{actual_rel_size:.3f}\t{rel_count:.3f}\t{actual_rel_count:.3f}"
+        )
+
+        assert abs(rel_size - actual_rel_size) < 0.10
+        assert abs(rel_count - actual_rel_count) < 0.15
+
+    print(f"Successfully validated allocation sampling accuracy for sample_interval={sample_interval}")
+    print(f"Captured {len(allocation_samples)} allocation samples representing {total_allocation_count} allocations")
+
+
+def test_memory_collector_allocation_tracking_across_snapshots():
+    mc = memalloc.MemoryCollector(heap_sample_size=64)
+
+    with mc:
+        data_to_free = []
+        for i in range(10):
+            data_to_free.append(one(256))
+
+        data_to_keep = []
+        for i in range(10):
+            data_to_keep.append(two(512))
+
+        del data_to_free
+
+        samples = mc.test_snapshot()
+
+        assert all(sample.alloc_size > 0 for sample in samples), (
+            "Initial snapshot should have alloc_size>0 (new allocations)"
+        )
+
+        freed_samples = [s for s in samples if s.in_use_size == 0]
+        live_samples = [s for s in samples if s.in_use_size > 0]
+
+        assert len(freed_samples) > 0, "Should have some freed samples after deletion"
+
+        assert len(live_samples) > 0, "Should have some live samples"
+
+        for sample in samples:
+            assert sample.size > 0, f"Invalid size: {sample.size}"
+            assert sample.count > 0, f"Invalid count: {sample.count}"
+            assert sample.in_use_size >= 0, f"Invalid in_use_size: {sample.in_use_size}"
+            assert sample.alloc_size >= 0, f"Invalid alloc_size: {sample.alloc_size}"
+
+        one_freed_samples = [sample for sample in samples if has_function_in_traceback(sample.frames, "one")]
+
+        assert len(one_freed_samples) > 0, "Should have freed samples from function 'one'"
+        assert all(sample.in_use_size == 0 and sample.alloc_size > 0 for sample in one_freed_samples)
+
+        two_live_samples = [sample for sample in samples if has_function_in_traceback(sample.frames, "two")]
+
+        assert len(two_live_samples) > 0, "Should have live samples from function 'two'"
+        assert all(sample.in_use_size > 0 and sample.alloc_size > 0 for sample in two_live_samples)
+
+        del data_to_keep
+
+
+def test_memory_collector_python_interface_with_allocation_tracking():
+    mc = memalloc.MemoryCollector(heap_sample_size=128)
+
+    with mc:
+        first_batch = []
+        for i in range(20):
+            first_batch.append(one(256))
+
+        # We're taking a snapshot here to ensure that in the next snapshot, we don't see any "one" allocations
+        mc.test_snapshot()
+
+        second_batch = []
+        for i in range(15):
+            second_batch.append(two(512))
+
+        del first_batch
+
+        final_samples = mc.test_snapshot()
+
+        assert len(final_samples) >= 0, "Final snapshot should be valid"
+
+        for sample in final_samples:
+            assert sample.size > 0, f"Size should be positive int, got {sample.size}"
+            assert sample.count > 0, f"Count should be positive int, got {sample.count}"
+            assert sample.in_use_size >= 0, f"in_use_size should be non-negative int, got {sample.in_use_size}"
+            assert sample.alloc_size >= 0, f"alloc_size should be non-negative int, got {sample.alloc_size}"
+
+        one_samples_in_final = [sample for sample in final_samples if has_function_in_traceback(sample.frames, "one")]
+
+        assert len(one_samples_in_final) == 0, (
+            f"Should have no samples with 'one' in traceback in final_samples, got {len(one_samples_in_final)}"
+        )
+
+        batch_two_live_samples = [
+            sample
+            for sample in final_samples
+            if has_function_in_traceback(sample.frames, "two") and sample.in_use_size > 0
+        ]
+
+        assert len(batch_two_live_samples) > 0, (
+            f"Should have live samples from batch two, got {len(batch_two_live_samples)}"
+        )
+        assert all(sample.in_use_size > 0 and sample.alloc_size > 0 for sample in batch_two_live_samples)
+
+        del second_batch
+
+
+def test_memory_collector_python_interface_with_allocation_tracking_no_deletion():
+    mc = memalloc.MemoryCollector(heap_sample_size=128)
+
+    with mc:
+        initial_samples = mc.test_snapshot()
+        initial_count = len(initial_samples)
+
+        first_batch = []
+        for i in range(20):
+            first_batch.append(one(256))
+
+        after_first_batch = mc.test_snapshot()
+
+        second_batch = []
+        for i in range(15):
+            second_batch.append(two(512))
+
+        final_samples = mc.test_snapshot()
+
+        assert len(after_first_batch) >= initial_count, "Should have at least as many samples after first batch"
+        assert len(final_samples) >= 0, "Final snapshot should be valid"
+
+        for samples in [initial_samples, after_first_batch, final_samples]:
+            for sample in samples:
+                assert sample.size > 0, f"Size should be positive int, got {sample.size}"
+                assert sample.count > 0, f"Count should be positive int, got {sample.count}"
+                assert sample.in_use_size >= 0, f"in_use_size should be non-negative int, got {sample.in_use_size}"
+                assert sample.alloc_size >= 0, f"alloc_size should be non-negative int, got {sample.alloc_size}"
+
+        batch_one_live_samples = [
+            sample
+            for sample in final_samples
+            if has_function_in_traceback(sample.frames, "one") and sample.in_use_size > 0
+        ]
+
+        batch_two_live_samples = [
+            sample
+            for sample in final_samples
+            if has_function_in_traceback(sample.frames, "two") and sample.in_use_size > 0
+        ]
+
+        assert len(batch_one_live_samples) > 0, (
+            f"Should have live samples from batch one, got {len(batch_one_live_samples)}"
+        )
+        assert len(batch_two_live_samples) > 0, (
+            f"Should have live samples from batch two, got {len(batch_two_live_samples)}"
+        )
+
+        assert all(sample.in_use_size > 0 and sample.alloc_size == 0 for sample in batch_one_live_samples)
+        assert all(sample.in_use_size > 0 and sample.alloc_size > 0 for sample in batch_two_live_samples)
+
+        del first_batch
+        del second_batch
+
+
+def test_memory_collector_exception_handling():
+    mc = memalloc.MemoryCollector(heap_sample_size=256)
+
+    with pytest.raises(ValueError):
+        with mc:
+            _allocate_1k()
+            samples = mc.test_snapshot()
+            assert isinstance(samples, tuple)
+            raise ValueError("Test exception")
+
+    with mc:
+        _allocate_1k()
+        samples = mc.test_snapshot()
+        assert isinstance(samples, tuple)
+
+
+def test_memory_collector_allocation_during_shutdown():
+    """Test that verifies that when _memalloc.stop() is called while allocations are still
+    happening in another thread, the shutdown process completes without deadlocks or crashes.
+    """
+    import time
+
+    from ddtrace.profiling.collector import _memalloc
+
+    _memalloc.start(32, 512)
+
+    shutdown_event = threading.Event()
+    allocation_thread = None
+
+    def allocate_continuously():
+        while not shutdown_event.is_set():
+            data = [0] * 100
+            del data
+            time.sleep(0.001)
+
+    try:
+        allocation_thread = threading.Thread(target=allocate_continuously)
+        allocation_thread.start()
+
+        time.sleep(0.1)
+
+        _memalloc.stop()
+
+    finally:
+        shutdown_event.set()
+        if allocation_thread:
+            allocation_thread.join(timeout=1)
+
+
+def test_memory_collector_buffer_pool_exhaustion():
+    """Test that the memory collector handles buffer pool exhaustion.
+    This test creates multiple threads that simultaneously allocate with very deep
+    stack traces, which could potentially exhaust internal buffer pools.
+    """
+    mc = memalloc.MemoryCollector(heap_sample_size=64)
+
+    with mc:
+        threads = []
+        barrier = threading.Barrier(10)
+
+        def allocate_with_traceback():
+            barrier.wait()
+
+            def deep_alloc(depth):
+                if depth == 0:
+                    return _create_allocation(100)
+                return deep_alloc(depth - 1)
+
+            data = deep_alloc(50)
+            del data
+
+        for i in range(10):
+            t = threading.Thread(target=allocate_with_traceback)
+            threads.append(t)
+            t.start()
+
+        for t in threads:
+            t.join()
+
+        samples = mc.test_snapshot()
+
+        deep_alloc_count = 0
+        max_stack_depth = 0
+
+        for sample in samples:
+            assert sample.frames is not None, "Buffer pool test: All samples should have stack frames"
+            stack_depth = len(sample.frames)
+            max_stack_depth = max(max_stack_depth, stack_depth)
+
+            for frame in sample.frames:
+                if frame.function_name == "deep_alloc":
+                    deep_alloc_count += 1
+                    break
+
+        assert deep_alloc_count >= 10, (
+            f"Buffer pool test: Expected many allocations from concurrent threads, got {deep_alloc_count}"
+        )
+
+        assert max_stack_depth >= 50, (
+            f"Buffer pool test: Stack traces should be preserved even under stress (expecting at least 50 frames), "
+            f"but max depth was only {max_stack_depth}"
+        )
+
+
+def test_memory_collector_thread_lifecycle():
+    """Test that continuously creates and destroys threads while they perform allocations,
+    verifying that the collector can track allocations across changing thread contexts.
+    """
+    mc = memalloc.MemoryCollector(heap_sample_size=8)
+
+    with mc:
+        threads = []
+
+        def worker():
+            for i in range(10):
+                data = [i] * 100
+                del data
+
+        for i in range(20):
+            t = threading.Thread(target=worker)
+            t.start()
+            threads.append(t)
+
+            if i > 5:
+                old_thread = threads.pop(0)
+                old_thread.join()
+
+        for t in threads:
+            t.join()
+
+        samples = mc.test_snapshot()
+
+        worker_samples = 0
+        for sample in samples:
+            for frame in sample.frames:
+                if frame.function_name == "worker":
+                    worker_samples += 1
+                    break
+
+        assert worker_samples > 0, (
+            "Thread lifecycle test: Should capture allocations even as threads are created/destroyed"
+        )
+
+
+# Tests from tests/profiling/collector/test_memalloc.py (v1)
+def _pre_allocate_1k():
+    return _allocate_1k()
 
 
 def _test_heap_impl(collector, max_nframe):
@@ -293,7 +918,171 @@ def _test_heap_impl(collector, max_nframe):
             pytest.fail("Allocated memory still in heap")
 
 
+def test_start_twice():
+    from ddtrace.profiling.collector import _memalloc
+
+    _memalloc.start(64, 512)
+    with pytest.raises(RuntimeError):
+        _memalloc.start(64, 512)
+    _memalloc.stop()
+
+
+def test_start_wrong_arg():
+    from ddtrace.profiling.collector import _memalloc
+
+    with pytest.raises(TypeError, match="function takes exactly 2 arguments \\(1 given\\)"):
+        _memalloc.start(2)
+
+    with pytest.raises(ValueError, match="the number of frames must be in range \\[1; 65535\\]"):
+        _memalloc.start(429496, 1)
+
+    with pytest.raises(ValueError, match="the number of frames must be in range \\[1; 65535\\]"):
+        _memalloc.start(-1, 1)
+
+    with pytest.raises(
+        ValueError,
+        match="the heap sample size must be in range \\[0; 4294967295\\]",
+    ):
+        _memalloc.start(64, -1)
+
+    with pytest.raises(
+        ValueError,
+        match="the heap sample size must be in range \\[0; 4294967295\\]",
+    ):
+        _memalloc.start(64, 345678909876)
+
+
+def test_start_stop():
+    from ddtrace.profiling.collector import _memalloc
+
+    _memalloc.start(1, 1)
+    _memalloc.stop()
+
+
+def test_iter_events():
+    from ddtrace.profiling.event import DDFrame
+
+    max_nframe = 32
+    collector = memalloc.MemoryCollector(max_nframe=max_nframe, heap_sample_size=64)
+    with collector:
+        _allocate_1k()
+        samples = collector.test_snapshot()
+    alloc_samples = [s for s in samples if s.alloc_size > 0]
+
+    total_alloc_count = sum(s.count for s in alloc_samples)
+
+    assert total_alloc_count >= 1000
+    # Watchout: if we dropped samples the test will likely fail
+
+    object_count = 0
+    for sample in alloc_samples:
+        stack = sample.frames
+        thread_id = sample.thread_id
+        size = sample.alloc_size
+
+        assert 0 < len(stack) <= max_nframe
+        assert size >= 1  # size depends on the object size
+        last_call = stack[0]
+        if last_call == DDFrame(
+            __file__,
+            _ALLOC_LINE_NUMBER,
+            "<listcomp>" if sys.version_info < (3, 12) else "_allocate_1k",
+            "",
+        ):
+            assert thread_id == threading.main_thread().ident
+            if sys.version_info < (3, 12) and len(stack) > 1:
+                assert stack[1] == DDFrame(__file__, _ALLOC_LINE_NUMBER, "_allocate_1k", "")
+            object_count += sample.count
+
+    assert object_count >= 1000
+
+
+def test_iter_events_dropped():
+    max_nframe = 32
+    collector = memalloc.MemoryCollector(max_nframe=max_nframe, heap_sample_size=64)
+    with collector:
+        _allocate_1k()
+        samples = collector.test_snapshot()
+    alloc_samples = [s for s in samples if s.alloc_size > 0]
+
+    total_alloc_count = sum(s.count for s in alloc_samples)
+
+    assert len(alloc_samples) > 0
+    assert total_alloc_count >= 1000
+
+
+def test_iter_events_not_started():
+    collector = memalloc.MemoryCollector()
+    samples = collector.test_snapshot()
+    assert samples == ()
+
+
+@pytest.mark.skipif(
+    os.getenv("DD_PROFILE_TEST_GEVENT", False),
+    reason="Test not compatible with gevent",
+)
+def test_iter_events_multi_thread():
+    from ddtrace.profiling.event import DDFrame
+
+    max_nframe = 32
+    t = threading.Thread(target=_allocate_1k)
+    collector = memalloc.MemoryCollector(max_nframe=max_nframe, heap_sample_size=64)
+    with collector:
+        _allocate_1k()
+        t.start()
+        t.join()
+
+        samples = collector.test_snapshot()
+    alloc_samples = [s for s in samples if s.alloc_size > 0]
+
+    total_alloc_count = sum(s.count for s in alloc_samples)
+
+    assert total_alloc_count >= 1000
+
+    # Watchout: if we dropped samples the test will likely fail
+
+    count_object = 0
+    count_thread = 0
+    for sample in alloc_samples:
+        stack = sample.frames
+        thread_id = sample.thread_id
+        size = sample.alloc_size
+
+        assert 0 < len(stack) <= max_nframe
+        assert size >= 1  # size depends on the object size
+        last_call = stack[0]
+        if last_call == DDFrame(
+            __file__,
+            _ALLOC_LINE_NUMBER,
+            "<listcomp>" if sys.version_info < (3, 12) else "_allocate_1k",
+            "",
+        ):
+            if thread_id == threading.main_thread().ident:
+                count_object += sample.count
+                if sys.version_info < (3, 12) and len(stack) > 1:
+                    assert stack[1] == DDFrame(__file__, _ALLOC_LINE_NUMBER, "_allocate_1k", "")
+            elif thread_id == t.ident:
+                count_thread += sample.count
+                entry = 2 if sys.version_info < (3, 12) else 1
+                if entry < len(stack):
+                    assert stack[entry].file_name == threading.__file__
+                    assert stack[entry].lineno > 0
+                    assert stack[entry].function_name == "run"
+
+    assert count_object >= 1000
+    assert count_thread >= 1000
+
+
+def test_heap():
+    max_nframe = 32
+    collector = memalloc.MemoryCollector(max_nframe=max_nframe, heap_sample_size=1024)
+    with collector:
+        _test_heap_impl(collector, max_nframe)
+
+
 def test_heap_stress():
+    from ddtrace.profiling.collector import _memalloc
+
     # This should run for a few seconds, and is enough to spot potential segfaults.
     _memalloc.start(64, 1024)
     try:
@@ -341,6 +1130,9 @@ def test_memalloc_speed(benchmark, heap_sample_size):
     ),
 )
 def test_memalloc_sample_size(enabled, predicates, monkeypatch):
+    from ddtrace.internal.settings.profiling import ProfilingConfig
+    from ddtrace.internal.settings.profiling import _derive_default_heap_sample_size
+
     monkeypatch.setenv("DD_PROFILING_HEAP_ENABLED", str(enabled).lower())
     config = ProfilingConfig()
 
diff --git a/tests/profiling/collector/test_stack.py b/tests/profiling/collector/test_stack.py
index 4ad90aa8e9f..3342c76cf2a 100644
--- a/tests/profiling/collector/test_stack.py
+++ b/tests/profiling/collector/test_stack.py
@@ -1,9 +1,9 @@
-# -*- encoding: utf-8 -*-
 import _thread
 import os
 import sys
 import threading
 import time
+from unittest.mock import patch
 import uuid
 
 import pytest
@@ -13,8 +13,7 @@
 from ddtrace.profiling.collector import stack
 from tests.conftest import get_original_test_name
 from tests.profiling.collector import pprof_utils
-
-from . import test_collector
+from tests.profiling.collector import test_collector
 
 
 # Python 3.11.9 is not compatible with gevent, https://github.com/gevent/gevent/issues/2040
@@ -46,6 +45,7 @@ def func5():
     return time.sleep(1)
 
 
+# Use subprocess as ddup config persists across tests.
 @pytest.mark.subprocess(
     env=dict(
         DD_PROFILING_MAX_FRAMES="5",
@@ -75,26 +75,317 @@ def test_collect_truncate():
     samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
     assert len(samples) > 0
     for sample in samples:
-        assert len(sample.location_id) <= max_nframes, len(sample.location_id)
+        # stack v2 adds one extra frame for "%d frames omitted" message
+        # Also, it allows max_nframes + 1 frames, so we add 2 here.
+        assert len(sample.location_id) <= max_nframes + 2, len(sample.location_id)
+
+
+def test_stack_locations(tmp_path):
+    test_name = "test_stack_locations"
+    pprof_prefix = str(tmp_path / test_name)
+    output_filename = pprof_prefix + "." + str(os.getpid())
+
+    assert ddup.is_available
+    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
+    ddup.start()
+
+    def baz():
+        time.sleep(0.1)
+
+    def bar():
+        baz()
+
+    def foo():
+        bar()
+
+    with stack.StackCollector():
+        for _ in range(10):
+            foo()
+    ddup.upload()
+
+    profile = pprof_utils.parse_newest_profile(output_filename)
+    samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
+    assert len(samples) > 0
+
+    expected_sample = pprof_utils.StackEvent(
+        thread_id=_thread.get_ident(),
+        thread_name="MainThread",
+        locations=[
+            pprof_utils.StackLocation(
+                function_name="baz",
+                filename="test_stack.py",
+                line_no=baz.__code__.co_firstlineno + 1,
+            ),
+            pprof_utils.StackLocation(
+                function_name="bar",
+                filename="test_stack.py",
+                line_no=bar.__code__.co_firstlineno + 1,
+            ),
+            pprof_utils.StackLocation(
+                function_name="foo",
+                filename="test_stack.py",
+                line_no=foo.__code__.co_firstlineno + 1,
+            ),
+        ],
+    )
+
+    pprof_utils.assert_profile_has_sample(profile, samples=samples, expected_sample=expected_sample)
+
+
+def test_push_span(tmp_path, tracer):
+    test_name = "test_push_span"
+    pprof_prefix = str(tmp_path / test_name)
+    output_filename = pprof_prefix + "." + str(os.getpid())
+
+    tracer._endpoint_call_counter_span_processor.enable()
+
+    assert ddup.is_available
+    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
+    ddup.start()
+
+    resource = str(uuid.uuid4())
+    span_type = ext.SpanTypes.WEB
+
+    with stack.StackCollector(
+        tracer=tracer,
+    ):
+        with tracer.trace("foobar", resource=resource, span_type=span_type) as span:
+            span_id = span.span_id
+            local_root_span_id = span._local_root.span_id
+            for _ in range(10):
+                time.sleep(0.1)
+    ddup.upload(tracer=tracer)
+
+    profile = pprof_utils.parse_newest_profile(output_filename)
+    samples = pprof_utils.get_samples_with_label_key(profile, "span id")
+    assert len(samples) > 0
+    for sample in samples:
+        pprof_utils.assert_stack_event(
+            profile,
+            sample,
+            expected_event=pprof_utils.StackEvent(
+                span_id=span_id,
+                local_root_span_id=local_root_span_id,
+                trace_type=span_type,
+                trace_endpoint=resource,
+            ),
+        )
+
+
+def test_push_span_unregister_thread(tmp_path, monkeypatch, tracer):
+    with patch("ddtrace.internal.datadog.profiling.stack_v2.unregister_thread") as unregister_thread:
+        tracer._endpoint_call_counter_span_processor.enable()
+
+        test_name = "test_push_span_unregister_thread"
+        pprof_prefix = str(tmp_path / test_name)
+        output_filename = pprof_prefix + "." + str(os.getpid())
+
+        assert ddup.is_available
+        ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
+        ddup.start()
+
+        resource = str(uuid.uuid4())
+        span_type = ext.SpanTypes.WEB
+
+        def target_fun():
+            for _ in range(10):
+                time.sleep(0.1)
+
+        with stack.StackCollector(
+            tracer=tracer,
+        ):
+            with tracer.trace("foobar", resource=resource, span_type=span_type) as span:
+                span_id = span.span_id
+                local_root_span_id = span._local_root.span_id
+                t = threading.Thread(target=target_fun)
+                t.start()
+                t.join()
+                thread_id = t.ident
+        ddup.upload(tracer=tracer)
+
+        profile = pprof_utils.parse_newest_profile(output_filename)
+        samples = pprof_utils.get_samples_with_label_key(profile, "span id")
+        assert len(samples) > 0
+        for sample in samples:
+            pprof_utils.assert_stack_event(
+                profile,
+                sample,
+                expected_event=pprof_utils.StackEvent(
+                    span_id=span_id,
+                    local_root_span_id=local_root_span_id,
+                    trace_type=span_type,
+                    trace_endpoint=resource,
+                ),
+            )
+
+        unregister_thread.assert_called_with(thread_id)
+
+
+def test_push_non_web_span(tmp_path, tracer):
+    tracer._endpoint_call_counter_span_processor.enable()
+
+    test_name = "test_push_non_web_span"
+    pprof_prefix = str(tmp_path / test_name)
+    output_filename = pprof_prefix + "." + str(os.getpid())
+
+    assert ddup.is_available
+    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
+    ddup.start()
+
+    resource = str(uuid.uuid4())
+    span_type = ext.SpanTypes.SQL
+
+    with stack.StackCollector(
+        tracer=tracer,
+    ):
+        with tracer.trace("foobar", resource=resource, span_type=span_type) as span:
+            span_id = span.span_id
+            local_root_span_id = span._local_root.span_id
+            for _ in range(10):
+                time.sleep(0.1)
+    ddup.upload(tracer=tracer)
+
+    profile = pprof_utils.parse_newest_profile(output_filename)
+    samples = pprof_utils.get_samples_with_label_key(profile, "span id")
+    assert len(samples) > 0
+    for sample in samples:
+        pprof_utils.assert_stack_event(
+            profile,
+            sample,
+            expected_event=pprof_utils.StackEvent(
+                span_id=span_id,
+                local_root_span_id=local_root_span_id,
+                trace_type=span_type,
+                # trace_endpoint is not set for non-web spans
+            ),
+        )
+
+
+def test_push_span_none_span_type(tmp_path, tracer):
+    # Test for https://github.com/DataDog/dd-trace-py/issues/11141
+    test_name = "test_push_span_none_span_type"
+    pprof_prefix = str(tmp_path / test_name)
+    output_filename = pprof_prefix + "." + str(os.getpid())
+
+    assert ddup.is_available
+    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
+    ddup.start()
+
+    tracer._endpoint_call_counter_span_processor.enable()
+
+    resource = str(uuid.uuid4())
+
+    with stack.StackCollector(
+        tracer=tracer,
+    ):
+        # Explicitly set None span_type as the default could change in the
+        # future.
+        with tracer.trace("foobar", resource=resource, span_type=None) as span:
+            span_id = span.span_id
+            local_root_span_id = span._local_root.span_id
+            for _ in range(10):
+                time.sleep(0.1)
+    ddup.upload(tracer=tracer)
+
+    profile = pprof_utils.parse_newest_profile(output_filename)
+    samples = pprof_utils.get_samples_with_label_key(profile, "span id")
+    assert len(samples) > 0
+    for sample in samples:
+        pprof_utils.assert_stack_event(
+            profile,
+            sample,
+            expected_event=pprof_utils.StackEvent(
+                span_id=span_id,
+                local_root_span_id=local_root_span_id,
+                # span_type is None
+                # trace_endpoint is not set for non-web spans
+            ),
+        )
+
+
+def test_exception_collection(tmp_path):
+    test_name = "test_exception_collection"
+    pprof_prefix = str(tmp_path / test_name)
+    output_filename = pprof_prefix + "." + str(os.getpid())
+
+    assert ddup.is_available
+    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
+    ddup.start()
+
+    with stack.StackCollector():
+        try:
+            raise ValueError("hello")
+        except Exception:
+            time.sleep(1)
 
+    ddup.upload()
+
+    profile = pprof_utils.parse_newest_profile(output_filename)
+    samples = pprof_utils.get_samples_with_label_key(profile, "exception type")
 
-def _find_sleep_event(events, class_name):
-    class_method_found = False
-    class_classmethod_found = False
+    # DEV: update the test once we have exception profiling for stack v2
+    # using echion
+    assert len(samples) == 0
 
-    for e in events:
-        for frame in e.frames:
-            if frame[0] == __file__.replace(".pyc", ".py") and frame[2] == "sleep_class" and frame[3] == class_name:
-                class_method_found = True
-            elif (
-                frame[0] == __file__.replace(".pyc", ".py") and frame[2] == "sleep_instance" and frame[3] == class_name
-            ):
-                class_classmethod_found = True
 
-        if class_method_found and class_classmethod_found:
-            return True
+def test_exception_collection_threads(tmp_path):
+    test_name = "test_exception_collection_threads"
+    pprof_prefix = str(tmp_path / test_name)
+    output_filename = pprof_prefix + "." + str(os.getpid())
 
-    return False
+    assert ddup.is_available
+    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
+    ddup.start()
+
+    with stack.StackCollector():
+
+        def target_fun():
+            try:
+                raise ValueError("hello")
+            except Exception:
+                time.sleep(1)
+
+        threads = []
+        for _ in range(10):
+            t = threading.Thread(target=target_fun)
+            threads.append(t)
+            t.start()
+
+        for t in threads:
+            t.join()
+
+    ddup.upload()
+
+    profile = pprof_utils.parse_newest_profile(output_filename)
+    samples = pprof_utils.get_samples_with_label_key(profile, "exception type")
+
+    assert len(samples) == 0
+
+
+def test_exception_collection_trace(tmp_path, tracer):
+    test_name = "test_exception_collection_trace"
+    pprof_prefix = str(tmp_path / test_name)
+    output_filename = pprof_prefix + "." + str(os.getpid())
+
+    tracer._endpoint_call_counter_span_processor.enable()
+
+    assert ddup.is_available
+    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
+    ddup.start()
+
+    with stack.StackCollector(tracer=tracer):
+        with tracer.trace("foobar", resource="resource", span_type=ext.SpanTypes.WEB):
+            try:
+                raise ValueError("hello")
+            except Exception:
+                time.sleep(1)
+
+    ddup.upload(tracer=tracer)
+
+    profile = pprof_utils.parse_newest_profile(output_filename)
+    samples = pprof_utils.get_samples_with_label_key(profile, "exception type")
+
+    assert len(samples) == 0
 
 
 def test_collect_once_with_class(tmp_path):
@@ -217,7 +508,9 @@ def _fib(n):
 @pytest.mark.skipif(not TESTING_GEVENT, reason="Not testing gevent")
 @pytest.mark.subprocess(ddtrace_run=True)
 def test_collect_gevent_thread_task():
-    from gevent import monkey  # noqa:F401
+    # TODO(taegyunkim): update echion to support gevent and test with stack v2
+
+    from gevent import monkey
 
     monkey.patch_all()
 
@@ -240,15 +533,19 @@ def test_collect_gevent_thread_task():
 
     # Start some (green)threads
     def _dofib():
-        for _ in range(10):
+        for _ in range(5):
             # spend some time in CPU so the profiler can catch something
-            _fib(28)
+            # On a Mac w/ Apple M3 MAX with Python 3.11 it takes about 200ms to calculate _fib(32)
+            # And _fib() is called 5 times so it should take about 1 second
+            # We use 5 threads below so it should take about 5 seconds
+            _fib(32)
             # Just make sure gevent switches threads/greenlets
             time.sleep(0)
 
     threads = []
+
     with stack.StackCollector():
-        for i in range(10):
+        for i in range(5):
             t = threading.Thread(target=_dofib, name="TestThread %d" % i)
             t.start()
             threads.append(t)
@@ -257,29 +554,37 @@ def _dofib():
 
     ddup.upload()
 
-    expected_task_ids = {thread.ident for thread in threads}
-
     profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_label_key(profile, "task id")
+    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
     assert len(samples) > 0
 
-    checked_thread = False
-
-    for sample in samples:
-        task_id_label = pprof_utils.get_label_with_key(profile.string_table, sample, "task id")
-        task_id = int(task_id_label.num)
-        if task_id in expected_task_ids:
-            pprof_utils.assert_stack_event(
-                profile,
-                sample,
-                pprof_utils.StackEvent(
-                    task_name=r"TestThread \d+$",
-                    task_id=task_id,
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name=r"Greenlet-\d+$",
+            locations=[
+                # Since we're using recursive function _fib(), we expect to have
+                # multiple locations for _fib(n) = _fib(n-1) + _fib(n-2)
+                pprof_utils.StackLocation(
+                    filename="test_stack.py",
+                    function_name="_fib",
+                    line_no=_fib.__code__.co_firstlineno + 6,
                 ),
-            )
-            checked_thread = True
-
-    assert checked_thread, "No samples found for the expected threads"
+                pprof_utils.StackLocation(
+                    filename="test_stack.py",
+                    function_name="_fib",
+                    line_no=_fib.__code__.co_firstlineno + 6,
+                ),
+                pprof_utils.StackLocation(
+                    filename="test_stack.py",
+                    function_name="_fib",
+                    line_no=_fib.__code__.co_firstlineno + 6,
+                ),
+            ],
+        ),
+    )
 
 
 def test_repr():
@@ -289,6 +594,7 @@ def test_repr():
     )
 
 
+# Tests from tests/profiling/collector/test_stack.py (v1)
 # Function to use for stress-test of polling
 MAX_FN_NUM = 30
 FN_TEMPLATE = """def _f{num}():
@@ -541,79 +847,3 @@ def _trace():
 
     for t in threads:
         t.join()
-
-
-@pytest.mark.skipif(not TESTING_GEVENT or sys.version_info < (3, 9), reason="Not testing gevent")
-@pytest.mark.subprocess(ddtrace_run=True)
-def test_collect_gevent_threads():
-    import gevent.monkey
-
-    gevent.monkey.patch_all()
-
-    import os
-    import threading
-    import time
-
-    from ddtrace.internal.datadog.profiling import ddup
-    from ddtrace.profiling.collector import stack
-    from tests.profiling.collector import pprof_utils
-
-    iteration = 100
-    sleep_time = 0.01
-    nb_threads = 15
-
-    # Start some greenthreads: they do nothing we just keep switching between them.
-    def _nothing():
-        for _ in range(iteration):
-            # Do nothing and just switch to another greenlet
-            time.sleep(sleep_time)
-
-    test_name = "test_collect_gevent_threads"
-    pprof_prefix = "/tmp/" + test_name
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    assert ddup.is_available
-    ddup.config(env="test", service="test_collect_gevent_threads", version="my_version", output_filename=pprof_prefix)
-    ddup.start()
-
-    with stack.StackCollector():
-        threads = []
-        i_to_tid = {}
-        for i in range(nb_threads):
-            t = threading.Thread(target=_nothing, name="TestThread %d" % i)
-            i_to_tid[i] = t.ident
-            t.start()
-            threads.append(t)
-        for t in threads:
-            t.join()
-
-    ddup.upload()
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
-    assert len(samples) > 0
-
-    for task_id in range(nb_threads):
-        pprof_utils.assert_profile_has_sample(
-            profile,
-            samples,
-            expected_sample=pprof_utils.StackEvent(
-                task_name="TestThread %d" % task_id,
-                task_id=i_to_tid[task_id],
-                thread_id=i_to_tid[task_id],
-                locations=[
-                    pprof_utils.StackLocation(
-                        filename="test_stack.py",
-                        function_name="_nothing",
-                        line_no=_nothing.__code__.co_firstlineno + 3,
-                    )
-                ],
-            ),
-        )
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            task_name="MainThread",
-        ),
-    )
diff --git a/tests/profiling/collector/test_stack_asyncio.py b/tests/profiling/collector/test_stack_asyncio.py
index f7d4ebd6cac..a13ea2aaad9 100644
--- a/tests/profiling/collector/test_stack_asyncio.py
+++ b/tests/profiling/collector/test_stack_asyncio.py
@@ -7,6 +7,7 @@
     ),
     err=None,
 )
+# For macOS: err=None ignores expected stderr from tracer failing to connect to agent (not relevant to this test)
 def test_asyncio():
     import asyncio
     import os
@@ -14,10 +15,13 @@ def test_asyncio():
     import uuid
 
     from ddtrace import ext
+    from ddtrace.internal.datadog.profiling import stack_v2
     from ddtrace.profiling import profiler
     from ddtrace.trace import tracer
     from tests.profiling.collector import pprof_utils
 
+    assert stack_v2.is_available, stack_v2.failure_msg
+
     sleep_time = 0.2
     loop_run_time = 3
 
@@ -37,7 +41,10 @@ async def hello():
 
     p = profiler.Profiler(tracer=tracer)
     p.start()
-    with tracer.trace("test_asyncio", resource=resource, span_type=span_type):
+    with tracer.trace("test_asyncio", resource=resource, span_type=span_type) as span:
+        span_id = span.span_id
+        local_root_span_id = span._local_root.span_id
+
         loop = asyncio.new_event_loop()
         asyncio.set_event_loop(loop)
         maintask = loop.create_task(hello(), name="main")
@@ -60,6 +67,8 @@ async def hello():
 
     # get samples with task_name
     samples = pprof_utils.get_samples_with_label_key(profile, "task name")
+    # The next fails if stack_v2 is not properly configured with asyncio task
+    # tracking via ddtrace.profiling._asyncio
     assert len(samples) > 0
 
     pprof_utils.assert_profile_has_sample(
@@ -68,10 +77,8 @@ async def hello():
         expected_sample=pprof_utils.StackEvent(
             thread_name="MainThread",
             task_name="main",
-            # Noticed that these are not set in Stack v1 as it doesn't propagate
-            # span correlation information. Stack v2 does.
-            # span_id=span_id,
-            # local_root_span_id=local_root_span_id,
+            span_id=span_id,
+            local_root_span_id=local_root_span_id,
             locations=[
                 pprof_utils.StackLocation(
                     function_name="hello", filename="test_stack_asyncio.py", line_no=hello.__code__.co_firstlineno + 3
@@ -86,8 +93,8 @@ async def hello():
         expected_sample=pprof_utils.StackEvent(
             thread_name="MainThread",
             task_name=t1_name,
-            # span_id=span_id,
-            # local_root_span_id=local_root_span_id,
+            span_id=span_id,
+            local_root_span_id=local_root_span_id,
             locations=[
                 pprof_utils.StackLocation(
                     function_name="stuff", filename="test_stack_asyncio.py", line_no=stuff.__code__.co_firstlineno + 3
@@ -102,8 +109,8 @@ async def hello():
         expected_sample=pprof_utils.StackEvent(
             thread_name="MainThread",
             task_name=t2_name,
-            # span_id=span_id,
-            # local_root_span_id=local_root_span_id,
+            span_id=span_id,
+            local_root_span_id=local_root_span_id,
             locations=[
                 pprof_utils.StackLocation(
                     function_name="stuff", filename="test_stack_asyncio.py", line_no=stuff.__code__.co_firstlineno + 3
@@ -111,3 +118,755 @@ async def hello():
             ],
         ),
     )
+
+
+@pytest.mark.subprocess(
+    env=dict(
+        DD_PROFILING_OUTPUT_PPROF="/tmp/test_asyncio_start_profiler_from_process_before_importing_asyncio",
+    ),
+    err=None,
+)
+# For macOS: err=None ignores expected stderr from tracer failing to connect to agent (not relevant to this test)
+def test_asyncio_start_profiler_from_process_before_importing_asyncio():
+    from ddtrace.internal.datadog.profiling import stack_v2
+    from ddtrace.profiling import profiler
+
+    assert stack_v2.is_available, stack_v2.failure_msg
+
+    p = profiler.Profiler()
+    p.start()
+
+    import asyncio
+    import os
+    import sys
+    import time
+
+    # Start an asyncio loop BEFORE importing profiler modules
+    # This simulates the bug scenario where a loop exists before profiling is enabled
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    async def my_function():
+        async def background_task_func() -> None:
+            """Background task that runs in the existing loop."""
+            await asyncio.sleep(1.5)
+
+        # Create and start a task in the existing loop
+        background_task = loop.create_task(background_task_func(), name="background")
+        assert background_task is not None
+
+        # Run tasks that should be tracked
+        sleep_time = 0.2
+        loop_run_time = 0.75
+
+        async def tracked_task() -> None:
+            start_time = time.time()
+            while time.time() < start_time + loop_run_time:
+                await asyncio.sleep(sleep_time)
+
+        async def main_task():
+            t1 = asyncio.create_task(tracked_task(), name="tracked 1")
+            t2 = asyncio.create_task(tracked_task(), name="tracked 2")
+            await tracked_task()
+            await asyncio.sleep(0.25)
+            return t1, t2
+
+        result = await main_task()
+
+        await background_task
+
+        return tracked_task, background_task_func, result
+
+    main_task = loop.create_task(my_function(), name="main")
+    tracked_task_def, background_task_def, (t1, t2) = loop.run_until_complete(main_task)
+
+    p.stop()
+
+    t1_name = t1.get_name()
+    t2_name = t2.get_name()
+
+    assert t1_name == "tracked 1"
+    assert t2_name == "tracked 2"
+
+    from tests.profiling.collector import pprof_utils
+
+    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
+    profile = pprof_utils.parse_newest_profile(output_filename)
+
+    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
+    assert len(samples) > 0, "No task names found - existing loop was not tracked!"
+
+    if sys.version_info >= (3, 11):
+        EXPECTED_FUNCTION_NAME_BACKGROUND = f"{my_function.__name__}.<locals>.{background_task_def.__name__}"
+    else:
+        EXPECTED_FUNCTION_NAME_BACKGROUND = background_task_def.__name__
+    EXPECTED_FILENAME_BACKGROUND = os.path.basename(background_task_def.__code__.co_filename)
+    EXPECTED_LINE_NO_BACKGROUND = background_task_def.__code__.co_firstlineno + 2
+
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name="background",
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name=EXPECTED_FUNCTION_NAME_BACKGROUND,
+                    filename=EXPECTED_FILENAME_BACKGROUND,
+                    line_no=EXPECTED_LINE_NO_BACKGROUND,
+                ),
+            ],
+        ),
+    )
+
+    # Verify specific tasks are in the profile
+    if sys.version_info >= (3, 11):
+        EXPECTED_FUNCTION_NAME_TRACKED = f"{my_function.__name__}.<locals>.{tracked_task_def.__name__}"
+    else:
+        EXPECTED_FUNCTION_NAME_TRACKED = tracked_task_def.__name__
+    EXPECTED_FILENAME_TRACKED = os.path.basename(tracked_task_def.__code__.co_filename)
+    EXPECTED_LINE_NO_TRACKED = tracked_task_def.__code__.co_firstlineno + 3
+
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name=t1_name,
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name=EXPECTED_FUNCTION_NAME_TRACKED,
+                    filename=EXPECTED_FILENAME_TRACKED,
+                    line_no=EXPECTED_LINE_NO_TRACKED,
+                )
+            ],
+        ),
+    )
+
+
+@pytest.mark.subprocess(
+    env=dict(
+        DD_PROFILING_OUTPUT_PPROF="/tmp/test_asyncio_start_profiler_from_process_before_starting_loop",
+    ),
+    err=None,
+)
+# For macOS: err=None ignores expected stderr from tracer failing to connect to agent (not relevant to this test)
+def test_asyncio_start_profiler_from_process_before_starting_loop():
+    import asyncio
+    import os
+    import sys
+    import time
+
+    from ddtrace.internal.datadog.profiling import stack_v2
+    from ddtrace.profiling import profiler
+
+    assert stack_v2.is_available, stack_v2.failure_msg
+
+    p = profiler.Profiler()
+    p.start()
+
+    # Start an asyncio loop BEFORE importing profiler modules
+    # This simulates the bug scenario where a loop exists before profiling is enabled
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    async def my_function():
+        async def background_task_func() -> None:
+            """Background task that runs in the existing loop."""
+            await asyncio.sleep(1.5)
+
+        # Create and start a task in the existing loop
+        background_task = loop.create_task(background_task_func(), name="background")
+        assert background_task is not None
+
+        # Run tasks that should be tracked
+        sleep_time = 0.2
+        loop_run_time = 0.75
+
+        async def tracked_task() -> None:
+            start_time = time.time()
+            while time.time() < start_time + loop_run_time:
+                await asyncio.sleep(sleep_time)
+
+        async def main_task():
+            t1 = asyncio.create_task(tracked_task(), name="tracked 1")
+            t2 = asyncio.create_task(tracked_task(), name="tracked 2")
+            await tracked_task()
+            await asyncio.sleep(0.25)
+            return t1, t2
+
+        result = await main_task()
+
+        await background_task
+
+        return tracked_task, background_task_func, result
+
+    main_task = loop.create_task(my_function(), name="main")
+    tracked_task_def, background_task_def, (t1, t2) = loop.run_until_complete(main_task)
+
+    p.stop()
+
+    t1_name = t1.get_name()
+    t2_name = t2.get_name()
+
+    assert t1_name == "tracked 1"
+    assert t2_name == "tracked 2"
+
+    from tests.profiling.collector import pprof_utils
+
+    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
+    profile = pprof_utils.parse_newest_profile(output_filename)
+
+    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
+    assert len(samples) > 0, "No task names found - existing loop was not tracked!"
+
+    if sys.version_info >= (3, 11):
+        EXPECTED_FUNCTION_NAME_BACKGROUND = f"{my_function.__name__}.<locals>.{background_task_def.__name__}"
+    else:
+        EXPECTED_FUNCTION_NAME_BACKGROUND = background_task_def.__name__
+    EXPECTED_FILENAME_BACKGROUND = os.path.basename(background_task_def.__code__.co_filename)
+    EXPECTED_LINE_NO_BACKGROUND = background_task_def.__code__.co_firstlineno + 2
+
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name="background",
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name=EXPECTED_FUNCTION_NAME_BACKGROUND,
+                    filename=EXPECTED_FILENAME_BACKGROUND,
+                    line_no=EXPECTED_LINE_NO_BACKGROUND,
+                ),
+            ],
+        ),
+    )
+
+    # Verify specific tasks are in the profile
+    if sys.version_info >= (3, 11):
+        EXPECTED_FUNCTION_NAME_TRACKED = f"{my_function.__name__}.<locals>.{tracked_task_def.__name__}"
+    else:
+        EXPECTED_FUNCTION_NAME_TRACKED = tracked_task_def.__name__
+    EXPECTED_FILENAME_TRACKED = os.path.basename(tracked_task_def.__code__.co_filename)
+    EXPECTED_LINE_NO_TRACKED = tracked_task_def.__code__.co_firstlineno + 3
+
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name=t1_name,
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name=EXPECTED_FUNCTION_NAME_TRACKED,
+                    filename=EXPECTED_FILENAME_TRACKED,
+                    line_no=EXPECTED_LINE_NO_TRACKED,
+                )
+            ],
+        ),
+    )
+
+
+@pytest.mark.xfail(reason="This test fails because there's no way to get the current loop if it's not already running.")
+@pytest.mark.subprocess(
+    env=dict(
+        DD_PROFILING_OUTPUT_PPROF="/tmp/test_asyncio_start_profiler_from_process_after_creating_loop",
+    ),
+    err=None,
+)
+# For macOS: err=None ignores expected stderr from tracer failing to connect to agent (not relevant to this test)
+def test_asyncio_start_profiler_from_process_after_creating_loop():
+    import asyncio
+    import os
+    import sys
+    import time
+
+    from ddtrace.internal.datadog.profiling import stack_v2
+    from ddtrace.profiling import profiler
+
+    # Start an asyncio loop BEFORE importing profiler modules
+    # This simulates the bug scenario where a loop exists before profiling is enabled
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    assert stack_v2.is_available, stack_v2.failure_msg
+
+    p = profiler.Profiler()
+    p.start()
+
+    async def my_function():
+        async def background_task_func() -> None:
+            """Background task that runs in the existing loop."""
+            await asyncio.sleep(1.5)
+
+        # Create and start a task in the existing loop
+        background_task = loop.create_task(background_task_func(), name="background")
+        assert background_task is not None
+
+        # Run tasks that should be tracked
+        sleep_time = 0.2
+        loop_run_time = 0.75
+
+        async def tracked_task() -> None:
+            start_time = time.time()
+            while time.time() < start_time + loop_run_time:
+                await asyncio.sleep(sleep_time)
+
+        async def main_task():
+            t1 = asyncio.create_task(tracked_task(), name="tracked 1")
+            t2 = asyncio.create_task(tracked_task(), name="tracked 2")
+            await tracked_task()
+            await asyncio.sleep(0.25)
+            return t1, t2
+
+        result = await main_task()
+
+        await background_task
+
+        return tracked_task, background_task_func, result
+
+    main_task = loop.create_task(my_function(), name="main")
+    tracked_task_def, background_task_def, (t1, t2) = loop.run_until_complete(main_task)
+
+    p.stop()
+
+    t1_name = t1.get_name()
+    t2_name = t2.get_name()
+
+    assert t1_name == "tracked 1"
+    assert t2_name == "tracked 2"
+
+    from tests.profiling.collector import pprof_utils
+
+    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
+    profile = pprof_utils.parse_newest_profile(output_filename)
+
+    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
+    assert len(samples) > 0, "No task names found - existing loop was not tracked!"
+
+    EXPECTED_FILENAME_BACKGROUND = os.path.basename(background_task_def.__code__.co_filename)
+    EXPECTED_LINE_NO_BACKGROUND = background_task_def.__code__.co_firstlineno + 2
+    if sys.version_info >= (3, 11):
+        EXPECTED_FUNCTION_NAME_BACKGROUND = f"{my_function.__name__}.<locals>.{background_task_def.__name__}"
+    else:
+        EXPECTED_FUNCTION_NAME_BACKGROUND = background_task_def.__name__
+
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name="background",
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name=EXPECTED_FUNCTION_NAME_BACKGROUND,
+                    filename=EXPECTED_FILENAME_BACKGROUND,
+                    line_no=EXPECTED_LINE_NO_BACKGROUND,
+                ),
+            ],
+        ),
+    )
+
+    # Verify specific tasks are in the profile
+    EXPECTED_FILENAME_TRACKED = os.path.basename(tracked_task_def.__code__.co_filename)
+    EXPECTED_LINE_NO_TRACKED = tracked_task_def.__code__.co_firstlineno + 3
+    if sys.version_info >= (3, 11):
+        EXPECTED_FUNCTION_NAME_TRACKED = f"{my_function.__name__}.<locals>.{tracked_task_def.__name__}"
+    else:
+        EXPECTED_FUNCTION_NAME_TRACKED = tracked_task_def.__name__
+
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name=t1_name,
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name=EXPECTED_FUNCTION_NAME_TRACKED,
+                    filename=EXPECTED_FILENAME_TRACKED,
+                    line_no=EXPECTED_LINE_NO_TRACKED,
+                )
+            ],
+        ),
+    )
+
+
+@pytest.mark.xfail(reason="This test fails because there's no way to get the current loop if it's not already running.")
+@pytest.mark.subprocess(
+    env=dict(
+        DD_PROFILING_OUTPUT_PPROF="/tmp/test_asyncio_import_profiler_from_process_after_starting_loop",
+    ),
+    err=None,
+)
+# For macOS: err=None ignores expected stderr from tracer failing to connect to agent (not relevant to this test)
+def test_asyncio_import_profiler_from_process_after_starting_loop():
+    import asyncio
+    import os
+    import sys
+    import time
+
+    # Start an asyncio loop BEFORE importing profiler modules
+    # This simulates the bug scenario where a loop exists before profiling is enabled
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    from ddtrace.internal.datadog.profiling import stack_v2
+    from ddtrace.profiling import profiler
+
+    assert stack_v2.is_available, stack_v2.failure_msg
+
+    p = profiler.Profiler()
+    p.start()
+
+    async def my_function():
+        async def background_task_func() -> None:
+            """Background task that runs in the existing loop."""
+            await asyncio.sleep(1.5)
+
+        # Create and start a task in the existing loop
+        background_task = loop.create_task(background_task_func(), name="background")
+        assert background_task is not None
+
+        # Run tasks that should be tracked
+        sleep_time = 0.2
+        loop_run_time = 0.75
+
+        async def tracked_task() -> None:
+            start_time = time.time()
+            while time.time() < start_time + loop_run_time:
+                await asyncio.sleep(sleep_time)
+
+        async def main_task():
+            t1 = asyncio.create_task(tracked_task(), name="tracked 1")
+            t2 = asyncio.create_task(tracked_task(), name="tracked 2")
+            await tracked_task()
+            await asyncio.sleep(0.25)
+            return t1, t2
+
+        result = await main_task()
+
+        await background_task
+
+        return tracked_task, background_task_func, result
+
+    main_task = loop.create_task(my_function(), name="main")
+    tracked_task_def, background_task_def, (t1, t2) = loop.run_until_complete(main_task)
+
+    p.stop()
+
+    t1_name = t1.get_name()
+    t2_name = t2.get_name()
+
+    assert t1_name == "tracked 1"
+    assert t2_name == "tracked 2"
+
+    from tests.profiling.collector import pprof_utils
+
+    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
+    profile = pprof_utils.parse_newest_profile(output_filename)
+
+    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
+    assert len(samples) > 0, "No task names found - existing loop was not tracked!"
+
+    EXPECTED_FILENAME_BACKGROUND = os.path.basename(background_task_def.__code__.co_filename)
+    EXPECTED_LINE_NO_BACKGROUND = background_task_def.__code__.co_firstlineno
+    if sys.version_info >= (3, 11):
+        EXPECTED_FUNCTION_NAME_BACKGROUND = f"{my_function.__name__}.<locals>.{background_task_def.__name__}"
+    else:
+        EXPECTED_FUNCTION_NAME_BACKGROUND = background_task_def.__name__
+
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name="background",
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name=EXPECTED_FUNCTION_NAME_BACKGROUND,
+                    filename=EXPECTED_FILENAME_BACKGROUND,
+                    line_no=EXPECTED_LINE_NO_BACKGROUND,
+                ),
+            ],
+        ),
+    )
+
+    # Verify specific tasks are in the profile
+    EXPECTED_FILENAME_TRACKED = os.path.basename(tracked_task_def.__code__.co_filename)
+    EXPECTED_LINE_NO_TRACKED = tracked_task_def.__code__.co_firstlineno + 3
+    if sys.version_info >= (3, 11):
+        EXPECTED_FUNCTION_NAME_TRACKED = f"{my_function.__name__}.<locals>.{tracked_task_def.__name__}"
+    else:
+        EXPECTED_FUNCTION_NAME_TRACKED = tracked_task_def.__name__
+
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name=t1_name,
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name=EXPECTED_FUNCTION_NAME_TRACKED,
+                    filename=EXPECTED_FILENAME_TRACKED,
+                    line_no=EXPECTED_LINE_NO_TRACKED,
+                )
+            ],
+        ),
+    )
+
+
+@pytest.mark.subprocess(
+    env=dict(
+        DD_PROFILING_OUTPUT_PPROF="/tmp/test_asyncio_start_profiler_from_process_after_creating_loop_and_task",
+    ),
+    err=None,
+)
+def test_asyncio_start_profiler_from_process_after_task_start():
+    # NOW import profiling modules - this should track the existing loop
+    import asyncio
+    import os
+    import sys
+    import time
+
+    from ddtrace.internal.datadog.profiling import stack_v2
+    from ddtrace.profiling import profiler
+
+    # Start an asyncio loop BEFORE importing profiler modules
+    # This simulates the bug scenario where a loop exists before profiling is enabled
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    async def my_function():
+        async def background_task_func() -> None:
+            """Background task that runs in the existing loop."""
+            await asyncio.sleep(1.5)
+
+        # Create and start a task in the existing loop
+        background_task = loop.create_task(background_task_func(), name="background")
+        assert background_task is not None
+
+        # Start profiler after loop is already running
+        assert asyncio.get_running_loop() is loop
+
+        assert stack_v2.is_available, stack_v2.failure_msg
+
+        p = profiler.Profiler()
+        p.start()
+
+        # Run tasks that should be tracked
+        sleep_time = 0.2
+        loop_run_time = 0.75
+
+        async def tracked_task() -> None:
+            start_time = time.time()
+            while time.time() < start_time + loop_run_time:
+                await asyncio.sleep(sleep_time)
+
+        async def main_task():
+            t1 = asyncio.create_task(tracked_task(), name="tracked 1")
+            t2 = asyncio.create_task(tracked_task(), name="tracked 2")
+            await tracked_task()
+            await asyncio.sleep(0.25)
+            return t1, t2
+
+        result = await main_task()
+
+        await background_task
+
+        return tracked_task, background_task_func, p, result
+
+    main_task = loop.create_task(my_function(), name="main")
+    tracked_task_def, background_task_def, p, (t1, t2) = loop.run_until_complete(main_task)
+
+    p.stop()
+
+    t1_name = t1.get_name()
+    t2_name = t2.get_name()
+
+    assert t1_name == "tracked 1"
+    assert t2_name == "tracked 2"
+
+    from tests.profiling.collector import pprof_utils
+
+    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
+    profile = pprof_utils.parse_newest_profile(output_filename)
+
+    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
+    assert len(samples) > 0, "No task names found - existing loop was not tracked!"
+
+    EXPECTED_FILENAME_BACKGROUND = os.path.basename(background_task_def.__code__.co_filename)
+    EXPECTED_LINE_NO_BACKGROUND = background_task_def.__code__.co_firstlineno
+    if sys.version_info >= (3, 11):
+        EXPECTED_FUNCTION_NAME_BACKGROUND = f"{my_function.__name__}.<locals>.{background_task_def.__name__}"
+    else:
+        EXPECTED_FUNCTION_NAME_BACKGROUND = background_task_def.__name__
+
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name="background",
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name=EXPECTED_FUNCTION_NAME_BACKGROUND,
+                    filename=EXPECTED_FILENAME_BACKGROUND,
+                    line_no=EXPECTED_LINE_NO_BACKGROUND,
+                ),
+            ],
+        ),
+    )
+
+    # Verify specific tasks are in the profile
+    EXPECTED_FILENAME_TRACKED = os.path.basename(tracked_task_def.__code__.co_filename)
+    EXPECTED_LINE_NO_TRACKED = tracked_task_def.__code__.co_firstlineno + 3
+    if sys.version_info >= (3, 11):
+        EXPECTED_FUNCTION_NAME_TRACKED = f"{my_function.__name__}.<locals>.{tracked_task_def.__name__}"
+    else:
+        EXPECTED_FUNCTION_NAME_TRACKED = tracked_task_def.__name__
+
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name=t1_name,
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name=EXPECTED_FUNCTION_NAME_TRACKED,
+                    filename=EXPECTED_FILENAME_TRACKED,
+                    line_no=EXPECTED_LINE_NO_TRACKED,
+                )
+            ],
+        ),
+    )
+
+
+@pytest.mark.subprocess(
+    env=dict(
+        DD_PROFILING_OUTPUT_PPROF="/tmp/test_asyncio_start_profiler_from_process_after_task_start",
+    ),
+    err=None,
+)
+def test_asyncio_import_and_start_profiler_from_process_after_task_start():
+    import asyncio
+    import os
+    import sys
+    import time
+
+    # Start an asyncio loop BEFORE importing profiler modules
+    # This simulates the bug scenario where a loop exists before profiling is enabled
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    async def my_function():
+        async def background_task_func() -> None:
+            """Background task that runs in the existing loop."""
+            await asyncio.sleep(1.5)
+
+        # Create and start a task in the existing loop
+        background_task = loop.create_task(background_task_func(), name="background")
+        assert background_task is not None
+
+        # Start profiler after loop is already running
+        assert asyncio.get_running_loop() is loop
+
+        # NOW import profiling modules - this should track the existing loop
+        from ddtrace.internal.datadog.profiling import stack_v2
+        from ddtrace.profiling import profiler
+
+        assert stack_v2.is_available, stack_v2.failure_msg
+
+        p = profiler.Profiler()
+        p.start()
+
+        # Run tasks that should be tracked
+        sleep_time = 0.2
+        loop_run_time = 0.75
+
+        async def tracked_task() -> None:
+            start_time = time.time()
+            while time.time() < start_time + loop_run_time:
+                await asyncio.sleep(sleep_time)
+
+        async def main_task():
+            t1 = asyncio.create_task(tracked_task(), name="tracked 1")
+            t2 = asyncio.create_task(tracked_task(), name="tracked 2")
+            await tracked_task()
+            await asyncio.sleep(0.25)
+            return t1, t2
+
+        result = await main_task()
+
+        await background_task
+
+        return tracked_task, background_task_func, p, result
+
+    main_task = loop.create_task(my_function(), name="main")
+    tracked_task_def, background_task_def, p, (t1, t2) = loop.run_until_complete(main_task)
+
+    p.stop()
+
+    t1_name = t1.get_name()
+    t2_name = t2.get_name()
+
+    assert t1_name == "tracked 1"
+    assert t2_name == "tracked 2"
+
+    from tests.profiling.collector import pprof_utils
+
+    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
+    profile = pprof_utils.parse_newest_profile(output_filename)
+
+    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
+    assert len(samples) > 0, "No task names found - existing loop was not tracked!"
+
+    if sys.version_info >= (3, 11):
+        EXPECTED_FUNCTION_NAME_BACKGROUND = f"{my_function.__name__}.<locals>.{background_task_def.__name__}"
+    else:
+        EXPECTED_FUNCTION_NAME_BACKGROUND = background_task_def.__name__
+    EXPECTED_FILENAME_BACKGROUND = os.path.basename(background_task_def.__code__.co_filename)
+    EXPECTED_LINE_NO_BACKGROUND = background_task_def.__code__.co_firstlineno
+
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name="background",
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name=EXPECTED_FUNCTION_NAME_BACKGROUND,
+                    filename=EXPECTED_FILENAME_BACKGROUND,
+                    line_no=EXPECTED_LINE_NO_BACKGROUND,
+                ),
+            ],
+        ),
+    )
+
+    # Verify specific tasks are in the profile
+    if sys.version_info >= (3, 11):
+        EXPECTED_FUNCTION_NAME_TRACKED = f"{my_function.__name__}.<locals>.{tracked_task_def.__name__}"
+    else:
+        EXPECTED_FUNCTION_NAME_TRACKED = tracked_task_def.__name__
+    EXPECTED_FILENAME_TRACKED = os.path.basename(tracked_task_def.__code__.co_filename)
+    EXPECTED_LINE_NO_TRACKED = tracked_task_def.__code__.co_firstlineno + 3
+
+    pprof_utils.assert_profile_has_sample(
+        profile,
+        samples,
+        expected_sample=pprof_utils.StackEvent(
+            thread_name="MainThread",
+            task_name=t1_name,
+            locations=[
+                pprof_utils.StackLocation(
+                    function_name=EXPECTED_FUNCTION_NAME_TRACKED,
+                    filename=EXPECTED_FILENAME_TRACKED,
+                    line_no=EXPECTED_LINE_NO_TRACKED,
+                )
+            ],
+        ),
+    )
diff --git a/tests/profiling_v2/collector/test_threading.py b/tests/profiling/collector/test_threading.py
similarity index 100%
rename from tests/profiling_v2/collector/test_threading.py
rename to tests/profiling/collector/test_threading.py
diff --git a/tests/profiling_v2/collector/test_threading_asyncio.py b/tests/profiling/collector/test_threading_asyncio.py
similarity index 100%
rename from tests/profiling_v2/collector/test_threading_asyncio.py
rename to tests/profiling/collector/test_threading_asyncio.py
diff --git a/tests/profiling/conftest.py b/tests/profiling/conftest.py
deleted file mode 100644
index 6b04983dd8f..00000000000
--- a/tests/profiling/conftest.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import os
-
-import pytest
-
-
-@pytest.fixture(scope="session", autouse=True)
-def disable_coverage_for_subprocess():
-    try:
-        del os.environ["COV_CORE_SOURCE"]
-    except KeyError:
-        pass
diff --git a/tests/profiling/collector/__init__.py b/tests/profiling/exporter/__init__.py
similarity index 100%
rename from tests/profiling/collector/__init__.py
rename to tests/profiling/exporter/__init__.py
diff --git a/tests/profiling_v2/exporter/test_ddup.py b/tests/profiling/exporter/test_ddup.py
similarity index 100%
rename from tests/profiling_v2/exporter/test_ddup.py
rename to tests/profiling/exporter/test_ddup.py
diff --git a/tests/profiling_v2/gunicorn.conf.py b/tests/profiling/gunicorn.conf.py
similarity index 100%
rename from tests/profiling_v2/gunicorn.conf.py
rename to tests/profiling/gunicorn.conf.py
diff --git a/tests/profiling_v2/native_tests b/tests/profiling/native_tests
similarity index 100%
rename from tests/profiling_v2/native_tests
rename to tests/profiling/native_tests
diff --git a/tests/profiling/simple_program_gevent.py b/tests/profiling/simple_program_gevent.py
index f50fa3aa2e0..1cc85d2c921 100644
--- a/tests/profiling/simple_program_gevent.py
+++ b/tests/profiling/simple_program_gevent.py
@@ -4,7 +4,7 @@
 import ddtrace.profiling.auto  # noqa:F401
 
 
-import gevent.monkey # noqa:F402
+import gevent.monkey  # noqa:F402
 
 gevent.monkey.patch_all()
 
diff --git a/tests/profiling_v2/simple_program_pytorch_gpu.py b/tests/profiling/simple_program_pytorch_gpu.py
similarity index 100%
rename from tests/profiling_v2/simple_program_pytorch_gpu.py
rename to tests/profiling/simple_program_pytorch_gpu.py
diff --git a/tests/profiling/suitespec.yml b/tests/profiling/suitespec.yml
index 51eb1cfe636..1b0ca074891 100644
--- a/tests/profiling/suitespec.yml
+++ b/tests/profiling/suitespec.yml
@@ -81,14 +81,14 @@ suites:
   profile_v2:
     env:
       DD_TRACE_AGENT_URL: ''
-    # `riot list --hash-only profile-v2$ | wc -1` = 19
-    parallelism: 16
+    # `riot list --hash-only profile-v2 | wc -l` = 26
+    parallelism: 26
     paths:
       - '@bootstrap'
       - '@core'
       - '@profiling'
       - tests/profiling/suitespec.yml
-      - tests/profiling_v2/*
+      - tests/profiling/*
     pattern: profile-v2
     retry: 2
     runner: riot
diff --git a/tests/profiling_v2/test_accuracy.py b/tests/profiling/test_accuracy.py
similarity index 95%
rename from tests/profiling_v2/test_accuracy.py
rename to tests/profiling/test_accuracy.py
index 2333e429e55..6fd014af181 100644
--- a/tests/profiling_v2/test_accuracy.py
+++ b/tests/profiling/test_accuracy.py
@@ -68,8 +68,8 @@ def test_accuracy_stack_v2():
 
     from ddtrace.profiling import profiler
     from tests.profiling.collector import pprof_utils
-    from tests.profiling_v2.test_accuracy import assert_almost_equal
-    from tests.profiling_v2.test_accuracy import spend_16
+    from tests.profiling.test_accuracy import assert_almost_equal
+    from tests.profiling.test_accuracy import spend_16
 
     # Set this to 100 so we don't sleep too often and mess with the precision.
     p = profiler.Profiler()
diff --git a/tests/profiling_v2/test_code_provenance.py b/tests/profiling/test_code_provenance.py
similarity index 100%
rename from tests/profiling_v2/test_code_provenance.py
rename to tests/profiling/test_code_provenance.py
diff --git a/tests/profiling/test_gunicorn.py b/tests/profiling/test_gunicorn.py
index a21acaa523f..78297c85e55 100644
--- a/tests/profiling/test_gunicorn.py
+++ b/tests/profiling/test_gunicorn.py
@@ -4,10 +4,21 @@
 import subprocess
 import sys
 import time
+import urllib.request
 
 import pytest
 
-from . import utils
+from tests.profiling.collector import pprof_utils
+
+
+# DEV: gunicorn tests are hard to debug, so keeping these print statements for
+# future debugging
+DEBUG_PRINT = True
+
+
+def debug_print(*args):
+    if DEBUG_PRINT:
+        print(*args)
 
 
 # gunicorn is not available on Windows
@@ -19,17 +30,27 @@
 
 def _run_gunicorn(*args):
     cmd = (
-        ["ddtrace-run", "gunicorn", "--bind", "127.0.0.1:7643", "--chdir", os.path.dirname(__file__)]
+        [
+            "ddtrace-run",
+            "gunicorn",
+            "--bind",
+            "127.0.0.1:7644",
+            "--worker-tmp-dir",
+            "/dev/shm",
+            "-c",
+            os.path.dirname(__file__) + "/gunicorn.conf.py",
+            "--chdir",
+            os.path.dirname(__file__),
+        ]
         + list(args)
-        + ["gunicorn-app:app"]
+        + ["tests.profiling.gunicorn-app:app"]
     )
     return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 
 
 @pytest.fixture
 def gunicorn(monkeypatch):
-    # Do not ignore profiler so we have samples in the output pprof
-    monkeypatch.setenv("DD_PROFILING_IGNORE_PROFILER", "0")
+    monkeypatch.setenv("DD_PROFILING_IGNORE_PROFILER", "1")
     monkeypatch.setenv("DD_PROFILING_ENABLED", "1")
 
     yield _run_gunicorn
@@ -44,22 +65,75 @@ def _test_gunicorn(gunicorn, tmp_path, monkeypatch, *args):
     # type: (...) -> None
     filename = str(tmp_path / "gunicorn.pprof")
     monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", filename)
-
-    proc = gunicorn("-w", "3", *args)
-    time.sleep(3)
-    proc.terminate()
+    monkeypatch.setenv("_DD_PROFILING_STACK_V2_ADAPTIVE_SAMPLING_ENABLED", "0")
+
+    debug_print("Creating gunicorn workers")
+    # DEV: We only start 1 worker to simplify the test
+    proc = gunicorn("-w", "1", *args)
+    # Wait for the workers to start
+    time.sleep(5)
+
+    if proc.poll() is not None:
+        pytest.fail("Gunicorn failed to start")
+
+    debug_print("Making request to gunicorn server")
+    try:
+        with urllib.request.urlopen("http://127.0.0.1:7644", timeout=5) as f:
+            status_code = f.getcode()
+            assert status_code == 200, status_code
+            response = f.read().decode()
+            debug_print(response)
+    except Exception as e:
+        proc.terminate()
+        output = proc.stdout.read().decode()
+        print(output)
+        pytest.fail("Failed to make request to gunicorn server %s" % e)
+    finally:
+        # Need to terminate the process to get the output and release the port
+        proc.terminate()
+
+    debug_print("Reading gunicorn worker output to get PIDs")
     output = proc.stdout.read().decode()
     worker_pids = _get_worker_pids(output)
+    debug_print("Gunicorn worker PIDs: %s" % worker_pids)
 
-    assert len(worker_pids) == 3, output
-    assert proc.wait() == 0, output
-    assert "module 'threading' has no attribute '_active'" not in output, output
+    for line in output.splitlines():
+        debug_print(line)
 
-    utils.check_pprof_file("%s.%d" % (filename, proc.pid))
-    for pid in worker_pids:
-        utils.check_pprof_file("%s.%d" % (filename, pid))
+    assert len(worker_pids) == 1, output
 
+    debug_print("Waiting for gunicorn process to terminate")
+    try:
+        assert proc.wait(timeout=5) == 0, output
+    except subprocess.TimeoutExpired:
+        pytest.fail("Failed to terminate gunicorn process ", output)
+    assert "module 'threading' has no attribute '_active'" not in output, output
 
+    for pid in worker_pids:
+        debug_print("Reading pprof file with prefix %s.%d" % (filename, pid))
+        profile = pprof_utils.parse_newest_profile("%s.%d" % (filename, pid))
+        # This returns a list of samples that have non-zero cpu-time
+        samples = pprof_utils.get_samples_with_value_type(profile, "cpu-time")
+        assert len(samples) > 0
+
+        # DEV: somehow the filename is reported as either __init__.py or gunicorn-app.py
+        # when run on GitLab CI. We need to match either of these two.
+        filename_regex = r"^(?:__init__\.py|gunicorn-app\.py)$"
+
+        expected_location = pprof_utils.StackLocation(function_name="fib", filename=filename_regex, line_no=8)
+
+        pprof_utils.assert_profile_has_sample(
+            profile,
+            samples=samples,
+            # DEV: we expect multiple locations as fibonacci is recursive
+            expected_sample=pprof_utils.StackEvent(locations=[expected_location, expected_location]),
+        )
+
+
+@pytest.mark.skipif(
+    sys.version_info[:2] == (3, 8) and os.environ.get("DD_PROFILE_TEST_GEVENT") == "1",
+    reason="Flaky and fails often on Python 3.8 with DD_PROFILE_TEST_GEVENT=1",
+)
 def test_gunicorn(gunicorn, tmp_path, monkeypatch):
     # type: (...) -> None
     args = ("-k", "gevent") if TESTING_GEVENT else tuple()
diff --git a/tests/profiling/test_main.py b/tests/profiling/test_main.py
index bb08b77e848..343de097662 100644
--- a/tests/profiling/test_main.py
+++ b/tests/profiling/test_main.py
@@ -2,77 +2,145 @@
 import multiprocessing
 import os
 import sys
-from typing import cast
 
 import pytest
 
+from tests.profiling.collector import lock_utils
+from tests.profiling.collector import pprof_utils
 from tests.utils import call_program
 
-from . import utils
 
-
-def test_call_script(monkeypatch):
-    # Set a very short timeout to exit fast
-    monkeypatch.setenv("DD_PROFILING_API_TIMEOUT_MS", "100")
-    monkeypatch.setenv("DD_PROFILING_ENABLED", "1")
+def test_call_script():
+    env = os.environ.copy()
+    env["DD_PROFILING_ENABLED"] = "1"
     stdout, stderr, exitcode, _ = call_program(
-        "ddtrace-run", sys.executable, os.path.join(os.path.dirname(__file__), "simple_program.py")
+        "ddtrace-run", sys.executable, os.path.join(os.path.dirname(__file__), "simple_program.py"), env=env
     )
     if sys.platform == "win32":
         assert exitcode == 0, (stdout, stderr)
     else:
         assert exitcode == 42, (stdout, stderr)
-    hello, pid = list(s.strip() for s in cast(bytes, stdout).decode().strip().split("\n"))
+    hello, pid = list(s.strip() for s in stdout.decode().strip().split("\n"))
     assert hello == "hello world", stdout.decode().strip()
 
 
 @pytest.mark.skipif(not os.getenv("DD_PROFILE_TEST_GEVENT", False), reason="Not testing gevent")
-def test_call_script_gevent(monkeypatch):
-    monkeypatch.setenv("DD_PROFILING_API_TIMEOUT_MS", "100")
+def test_call_script_gevent():
+    if sys.version_info[:2] == (3, 8):
+        pytest.skip("this test is flaky on 3.8 with stack v2")
+    env = os.environ.copy()
+    env["DD_PROFILING_ENABLED"] = "1"
     stdout, stderr, exitcode, pid = call_program(
-        sys.executable, os.path.join(os.path.dirname(__file__), "simple_program_gevent.py")
+        sys.executable, os.path.join(os.path.dirname(__file__), "simple_program_gevent.py"), env=env
     )
     assert exitcode == 0, (stdout, stderr)
 
 
-def test_call_script_pprof_output(tmp_path, monkeypatch):
+def test_call_script_pprof_output(tmp_path):
     """This checks if the pprof output and atexit register work correctly.
 
     The script does not run for one minute, so if the `stop_on_exit` flag is broken, this test will fail.
     """
     filename = str(tmp_path / "pprof")
-    monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", filename)
-    monkeypatch.setenv("DD_PROFILING_CAPTURE_PCT", "1")
-    monkeypatch.setenv("DD_PROFILING_ENABLED", "1")
+    env = os.environ.copy()
+    env["DD_PROFILING_OUTPUT_PPROF"] = filename
+    env["DD_PROFILING_CAPTURE_PCT"] = "1"
+    env["DD_PROFILING_ENABLED"] = "1"
     stdout, stderr, exitcode, _ = call_program(
-        "ddtrace-run", sys.executable, os.path.join(os.path.dirname(__file__), "simple_program.py")
+        "ddtrace-run",
+        sys.executable,
+        os.path.join(os.path.dirname(__file__), "simple_program.py"),
+        env=env,
     )
     if sys.platform == "win32":
         assert exitcode == 0, (stdout, stderr)
     else:
         assert exitcode == 42, (stdout, stderr)
-    hello, pid = list(s.strip() for s in cast(bytes, stdout).decode().strip().split("\n"))
-    utils.check_pprof_file(filename + "." + str(pid))
+    _, pid = list(s.strip() for s in stdout.decode().strip().split("\n"))
+    profile = pprof_utils.parse_newest_profile(filename + "." + str(pid))
+    samples = pprof_utils.get_samples_with_value_type(profile, "cpu-time")
+    assert len(samples) > 0
 
 
 @pytest.mark.skipif(sys.platform == "win32", reason="fork only available on Unix")
-def test_fork(tmp_path, monkeypatch):
+def test_fork(tmp_path):
     filename = str(tmp_path / "pprof")
-    monkeypatch.setenv("DD_PROFILING_API_TIMEOUT_MS", "100")
-    monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", filename)
-    monkeypatch.setenv("DD_PROFILING_CAPTURE_PCT", "100")
-    stdout, _, exitcode, pid = call_program("python", os.path.join(os.path.dirname(__file__), "simple_program_fork.py"))
+    env = os.environ.copy()
+    env["DD_PROFILING_OUTPUT_PPROF"] = filename
+    env["DD_PROFILING_CAPTURE_PCT"] = "100"
+    stdout, stderr, exitcode, pid = call_program(
+        "python", os.path.join(os.path.dirname(__file__), "simple_program_fork.py"), env=env
+    )
     assert exitcode == 0
-    child_pid = cast(bytes, stdout).decode().strip()
-    utils.check_pprof_file(filename + "." + str(pid))
-    utils.check_pprof_file(filename + "." + str(child_pid), sample_type="lock-release")
+    child_pid = stdout.decode().strip()
+    profile = pprof_utils.parse_newest_profile(filename + "." + str(pid))
+    parent_expected_acquire_events = [
+        pprof_utils.LockAcquireEvent(
+            caller_name="<module>",
+            filename="simple_program_fork.py",
+            linenos=lock_utils.LineNo(create=11, acquire=12, release=28),
+            lock_name="lock",
+        ),
+    ]
+    parent_expected_release_events = [
+        pprof_utils.LockReleaseEvent(
+            caller_name="<module>",
+            filename="simple_program_fork.py",
+            linenos=lock_utils.LineNo(create=11, acquire=12, release=28),
+            lock_name="lock",
+        ),
+    ]
+    pprof_utils.assert_lock_events(
+        profile,
+        expected_acquire_events=parent_expected_acquire_events,
+        expected_release_events=parent_expected_release_events,
+    )
+    child_profile = pprof_utils.parse_newest_profile(filename + "." + str(child_pid))
+    # We expect the child profile to not have lock events from the parent process
+    # Note that assert_lock_events function only checks that the given events
+    # exists, and doesn't assert that other events don't exist.
+    with pytest.raises(AssertionError):
+        pprof_utils.assert_lock_events(
+            child_profile,
+            expected_acquire_events=parent_expected_acquire_events,
+            expected_release_events=parent_expected_release_events,
+        )
+    pprof_utils.assert_lock_events(
+        child_profile,
+        expected_acquire_events=[
+            # After fork(), we clear the samples in child, so we only have one
+            # lock acquire event
+            pprof_utils.LockAcquireEvent(
+                caller_name="<module>",
+                filename="simple_program_fork.py",
+                linenos=lock_utils.LineNo(create=24, acquire=25, release=26),
+                lock_name="lock",
+            ),
+        ],
+        expected_release_events=[
+            pprof_utils.LockReleaseEvent(
+                caller_name="<module>",
+                filename="simple_program_fork.py",
+                linenos=lock_utils.LineNo(create=11, acquire=12, release=21),
+                lock_name="lock",
+            ),
+            pprof_utils.LockReleaseEvent(
+                caller_name="<module>",
+                filename="simple_program_fork.py",
+                linenos=lock_utils.LineNo(create=24, acquire=25, release=26),
+                lock_name="lock",
+            ),
+        ],
+    )
 
 
 @pytest.mark.skipif(sys.platform == "win32", reason="fork only available on Unix")
 @pytest.mark.skipif(not os.getenv("DD_PROFILE_TEST_GEVENT", False), reason="Not testing gevent")
-def test_fork_gevent(monkeypatch):
-    monkeypatch.setenv("DD_PROFILING_API_TIMEOUT_MS", "100")
-    _, _, exitcode, _ = call_program("python", os.path.join(os.path.dirname(__file__), "gevent_fork.py"))
+def test_fork_gevent():
+    env = os.environ.copy()
+    stdout, stderr, exitcode, pid = call_program(
+        "python", os.path.join(os.path.dirname(__file__), "gevent_fork.py"), env=env
+    )
     assert exitcode == 0
 
 
@@ -83,21 +151,27 @@ def test_fork_gevent(monkeypatch):
     "method",
     set(methods) - {"forkserver", "fork"},
 )
-def test_multiprocessing(method, tmp_path, monkeypatch):
+def test_multiprocessing(method, tmp_path):
     filename = str(tmp_path / "pprof")
-    monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", filename)
-    monkeypatch.setenv("DD_PROFILING_ENABLED", "1")
-    monkeypatch.setenv("DD_PROFILING_CAPTURE_PCT", "1")
+    env = os.environ.copy()
+    env["DD_PROFILING_OUTPUT_PPROF"] = filename
+    env["DD_PROFILING_ENABLED"] = "1"
+    env["DD_PROFILING_CAPTURE_PCT"] = "1"
     stdout, stderr, exitcode, _ = call_program(
         "ddtrace-run",
         sys.executable,
         os.path.join(os.path.dirname(__file__), "_test_multiprocessing.py"),
         method,
+        env=env,
     )
     assert exitcode == 0, (stdout, stderr)
-    pid, child_pid = list(s.strip() for s in cast(bytes, stdout).decode().strip().split("\n"))
-    utils.check_pprof_file(filename + "." + str(pid))
-    utils.check_pprof_file(filename + "." + str(child_pid), sample_type="wall-time")
+    pid, child_pid = list(s.strip() for s in stdout.decode().strip().split("\n"))
+    profile = pprof_utils.parse_newest_profile(filename + "." + str(pid))
+    samples = pprof_utils.get_samples_with_value_type(profile, "cpu-time")
+    assert len(samples) > 0
+    child_profile = pprof_utils.parse_newest_profile(filename + "." + str(child_pid))
+    child_samples = pprof_utils.get_samples_with_value_type(child_profile, "cpu-time")
+    assert len(child_samples) > 0
 
 
 @pytest.mark.subprocess(
@@ -114,7 +188,6 @@ def test_memalloc_no_init_error_on_fork():
     os.waitpid(pid, 0)
 
 
-@pytest.mark.skipif(sys.version_info[:2] == (3, 9), reason="This test is flaky on Python 3.9")
 @pytest.mark.subprocess(
     ddtrace_run=True,
     env=dict(
diff --git a/tests/profiling/test_profiler.py b/tests/profiling/test_profiler.py
index 7bcbb9351c4..90b7b4ac638 100644
--- a/tests/profiling/test_profiler.py
+++ b/tests/profiling/test_profiler.py
@@ -1,10 +1,12 @@
 import logging
+import sys
 import time
 from unittest import mock
 
 import pytest
 
 import ddtrace
+from ddtrace.internal.compat import PYTHON_VERSION_INFO
 from ddtrace.profiling import collector
 from ddtrace.profiling import profiler
 from ddtrace.profiling import scheduler
@@ -144,3 +146,186 @@ def test_profiler_serverless(monkeypatch):
     p = profiler.Profiler()
     assert isinstance(p._scheduler, scheduler.ServerlessScheduler)
     assert p.tags["functionname"] == "foobar"
+
+
+@pytest.mark.skipif(PYTHON_VERSION_INFO < (3, 10), reason="ddtrace under Python 3.9 is deprecated")
+@pytest.mark.subprocess()
+def test_profiler_ddtrace_deprecation():
+    """
+    ddtrace interfaces loaded by the profiler can be marked deprecated, and we should update
+    them when this happens.  As reported by https://github.com/DataDog/dd-trace-py/issues/8881
+    """
+    import warnings
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", DeprecationWarning)
+        from ddtrace.profiling import _threading  # noqa:F401
+        from ddtrace.profiling import event  # noqa:F401
+        from ddtrace.profiling import profiler  # noqa:F401
+        from ddtrace.profiling import scheduler  # noqa:F401
+        from ddtrace.profiling.collector import _lock  # noqa:F401
+        from ddtrace.profiling.collector import _task  # noqa:F401
+        from ddtrace.profiling.collector import _traceback  # noqa:F401
+        from ddtrace.profiling.collector import memalloc  # noqa:F401
+        from ddtrace.profiling.collector import stack  # noqa:F401
+
+
+@pytest.mark.subprocess(
+    env=dict(DD_PROFILING_ENABLED="true"),
+    err="Failed to load ddup module (mock failure message), disabling profiling\n",
+)
+def test_libdd_failure_telemetry_logging():
+    """Test that libdd initialization failures log to telemetry. This mimics
+    one of the two scenarios where profiling can be configured.
+    1) using ddtrace-run with DD_PROFILNG_ENABLED=true
+    2) import ddtrace.profiling.auto
+    """
+
+    from unittest import mock
+
+    with (
+        mock.patch.multiple(
+            "ddtrace.internal.datadog.profiling.ddup",
+            failure_msg="mock failure message",
+            is_available=False,
+        ),
+        mock.patch("ddtrace.internal.telemetry.telemetry_writer.add_log") as mock_add_log,
+    ):
+        from ddtrace.internal.settings.profiling import config  # noqa:F401
+        from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
+
+        mock_add_log.assert_called_once()
+        call_args = mock_add_log.call_args
+        assert call_args[0][0] == TELEMETRY_LOG_LEVEL.ERROR
+        message = call_args[0][1]
+        assert "Failed to load ddup module" in message
+        assert "mock failure message" in message
+
+
+@pytest.mark.subprocess(
+    # We'd like to check the stderr, but it somehow leads to triggering the
+    # upload code path on macOS
+    err=None
+)
+def test_libdd_failure_telemetry_logging_with_auto():
+    from unittest import mock
+
+    with (
+        mock.patch.multiple(
+            "ddtrace.internal.datadog.profiling.ddup",
+            failure_msg="mock failure message",
+            is_available=False,
+        ),
+        mock.patch("ddtrace.internal.telemetry.telemetry_writer.add_log") as mock_add_log,
+    ):
+        from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
+        import ddtrace.profiling.auto  # noqa: F401
+
+        mock_add_log.assert_called_once()
+        call_args = mock_add_log.call_args
+        assert call_args[0][0] == TELEMETRY_LOG_LEVEL.ERROR
+        message = call_args[0][1]
+        assert "Failed to load ddup module" in message
+        assert "mock failure message" in message
+
+
+@pytest.mark.subprocess(
+    env=dict(DD_PROFILING_ENABLED="true"),
+    err="Failed to load stack_v2 module (mock failure message), falling back to v1 stack sampler\n",
+)
+def test_stack_v2_failure_telemetry_logging():
+    # Test that stack_v2 initialization failures log to telemetry. This is
+    # mimicking the behavior of ddtrace-run, where the config is imported to
+    # determine if profiling/stack_v2 is enabled
+
+    from unittest import mock
+
+    with (
+        mock.patch.multiple(
+            "ddtrace.internal.datadog.profiling.stack_v2",
+            failure_msg="mock failure message",
+            is_available=False,
+        ),
+        mock.patch("ddtrace.internal.telemetry.telemetry_writer.add_log") as mock_add_log,
+    ):
+        from ddtrace.internal.settings.profiling import config  # noqa: F401
+        from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
+
+        mock_add_log.assert_called_once()
+        call_args = mock_add_log.call_args
+        assert call_args[0][0] == TELEMETRY_LOG_LEVEL.ERROR
+        message = call_args[0][1]
+        assert "Failed to load stack_v2 module" in message
+        assert "mock failure message" in message
+
+
+@pytest.mark.subprocess(
+    # We'd like to check the stderr, but it somehow leads to triggering the
+    # upload code path on macOS.
+    err=None,
+)
+def test_stack_v2_failure_telemetry_logging_with_auto():
+    from unittest import mock
+
+    with (
+        mock.patch.multiple(
+            "ddtrace.internal.datadog.profiling.stack_v2",
+            failure_msg="mock failure message",
+            is_available=False,
+        ),
+        mock.patch("ddtrace.internal.telemetry.telemetry_writer.add_log") as mock_add_log,
+    ):
+        from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
+        import ddtrace.profiling.auto  # noqa: F401
+
+        mock_add_log.assert_called_once()
+        call_args = mock_add_log.call_args
+        assert call_args[0][0] == TELEMETRY_LOG_LEVEL.ERROR
+        message = call_args[0][1]
+        assert "Failed to load stack_v2 module" in message
+        assert "mock failure message" in message
+
+
+@pytest.mark.skipif(not sys.platform.startswith("linux"), reason="only works on linux")
+@pytest.mark.subprocess(err=None)
+# For macOS: Could print 'Error uploading' but okay to ignore since we are checking if native_id is set
+def test_user_threads_have_native_id():
+    from os import getpid
+    from threading import Thread
+    from threading import _MainThread  # pyright: ignore[reportAttributeAccessIssue]
+    from threading import current_thread
+    from time import sleep
+
+    from ddtrace.profiling import profiler
+
+    # DEV: We used to run this test with ddtrace_run=True passed into the
+    # subprocess decorator, but that caused this to be flaky for Python 3.8.x
+    # with gevent. When it failed for that specific venv, current_thread()
+    # returned a DummyThread instead of a _MainThread.
+    p = profiler.Profiler()
+    p.start()
+
+    main = current_thread()
+    assert isinstance(main, _MainThread)
+    # We expect the current thread to have the same ID as the PID
+    assert main.native_id == getpid(), (main.native_id, getpid())
+
+    t = Thread(target=lambda: None)
+    t.start()
+
+    for _ in range(10):
+        try:
+            # The TID should be higher than the PID, but not too high
+            assert 0 < t.native_id - getpid() < 100, (t.native_id, getpid())  # pyright: ignore[reportOptionalOperand]
+        except AttributeError:
+            # The native_id attribute is set by the thread so we might have to
+            # wait a bit for it to be set.
+            sleep(0.1)
+        else:
+            break
+    else:
+        raise AssertionError("Thread.native_id not set")
+
+    t.join()
+
+    p.stop()
diff --git a/tests/profiling_v2/test_pytorch.py b/tests/profiling/test_pytorch.py
similarity index 100%
rename from tests/profiling_v2/test_pytorch.py
rename to tests/profiling/test_pytorch.py
diff --git a/tests/profiling/test_scheduler.py b/tests/profiling/test_scheduler.py
index 0da58e0e922..9d89a51574c 100644
--- a/tests/profiling/test_scheduler.py
+++ b/tests/profiling/test_scheduler.py
@@ -1,8 +1,7 @@
 # -*- encoding: utf-8 -*-
 import logging
 import time
-
-import mock
+from unittest import mock
 
 from ddtrace.profiling import scheduler
 
@@ -15,6 +14,7 @@ def test_exporter_failure():
 def test_thread_name():
     s = scheduler.Scheduler()
     s.start()
+    assert s._worker is not None
     assert s._worker.name == "ddtrace.profiling.scheduler:Scheduler"
     s.stop()
 
@@ -37,7 +37,7 @@ def call_me():
     s = scheduler.Scheduler(before_flush=call_me)
     s.flush()
     assert caplog.record_tuples == [
-        ("ddtrace.profiling.scheduler", logging.ERROR, "Scheduler before_flush hook failed")
+        (("ddtrace.profiling.scheduler", logging.ERROR, "Scheduler before_flush hook failed"))
     ]
 
 
diff --git a/tests/profiling/test_uwsgi.py b/tests/profiling/test_uwsgi.py
index 79b20e917a8..c9e7376bc59 100644
--- a/tests/profiling/test_uwsgi.py
+++ b/tests/profiling/test_uwsgi.py
@@ -1,23 +1,22 @@
-# -*- encoding: utf-8 -*-
+from importlib.metadata import version
 import os
 import re
 import signal
 from subprocess import TimeoutExpired
 import sys
-import tempfile
 import time
 
 import pytest
 
 from tests.contrib.uwsgi import run_uwsgi
-
-from . import utils
+from tests.profiling.collector import pprof_utils
 
 
 # uwsgi is not available on Windows
 if sys.platform == "win32":
     pytestmark = pytest.mark.skip
 
+TESTING_GEVENT = os.getenv("DD_PROFILE_TEST_GEVENT", False)
 THREADS_MSG = (
     b"ddtrace.internal.uwsgi.uWSGIConfigError: enable-threads option must be set to true, or a positive "
     b"number of threads must be set"
@@ -27,12 +26,22 @@
 
 
 @pytest.fixture
-def uwsgi(monkeypatch):
+def uwsgi(monkeypatch, tmp_path):
     # Do not ignore profiler so we have samples in the output pprof
     monkeypatch.setenv("DD_PROFILING_IGNORE_PROFILER", "0")
     # Do not use pytest tmpdir fixtures which generate directories longer than allowed for a socket file name
-    socket_name = tempfile.mktemp()
-    cmd = ["uwsgi", "--need-app", "--die-on-term", "--socket", socket_name, "--wsgi-file", uwsgi_app]
+    socket_name = str(tmp_path / "uwsgi.sock")
+    import os
+
+    cmd = [
+        "uwsgi",
+        "--need-app",
+        "--die-on-term",
+        "--socket",
+        socket_name,
+        "--wsgi-file",
+        uwsgi_app,
+    ]
 
     try:
         yield run_uwsgi(cmd)
@@ -67,10 +76,12 @@ def test_uwsgi_threads_enabled(uwsgi, tmp_path, monkeypatch):
     proc.terminate()
     assert proc.wait() == 30
     for pid in worker_pids:
-        utils.check_pprof_file("%s.%d" % (filename, pid))
+        profile = pprof_utils.parse_newest_profile("%s.%d" % (filename, pid))
+        samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
+        assert len(samples) > 0
 
 
-def test_uwsgi_threads_processes_no_master(uwsgi, monkeypatch):
+def test_uwsgi_threads_processes_no_primary(uwsgi, monkeypatch):
     proc = uwsgi("--enable-threads", "--processes", "2")
     stdout, _ = proc.communicate()
     assert (
@@ -99,7 +110,7 @@ def _get_worker_pids(stdout, num_worker, num_app_started=1):
     return worker_pids
 
 
-def test_uwsgi_threads_processes_master(uwsgi, tmp_path, monkeypatch):
+def test_uwsgi_threads_processes_primary(uwsgi, tmp_path, monkeypatch):
     filename = str(tmp_path / "uwsgi.pprof")
     monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", filename)
     proc = uwsgi("--enable-threads", "--master", "--py-call-uwsgi-fork-hooks", "--processes", "2")
@@ -109,42 +120,109 @@ def test_uwsgi_threads_processes_master(uwsgi, tmp_path, monkeypatch):
     proc.terminate()
     assert proc.wait() == 0
     for pid in worker_pids:
-        utils.check_pprof_file("%s.%d" % (filename, pid))
+        profile = pprof_utils.parse_newest_profile("%s.%d" % (filename, pid))
+        samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
+        assert len(samples) > 0
 
 
-def test_uwsgi_threads_processes_master_lazy_apps(uwsgi, tmp_path, monkeypatch):
+def test_uwsgi_threads_processes_primary_lazy_apps(uwsgi, tmp_path, monkeypatch):
     filename = str(tmp_path / "uwsgi.pprof")
     monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", filename)
-    proc = uwsgi("--enable-threads", "--master", "--processes", "2", "--lazy-apps")
+    monkeypatch.setenv("DD_PROFILING_UPLOAD_INTERVAL", "1")
+    # For uwsgi<2.0.30, --skip-atexit is required to avoid crashes when
+    # the child process exits.
+    proc = uwsgi("--enable-threads", "--master", "--processes", "2", "--lazy-apps", "--skip-atexit")
     worker_pids = _get_worker_pids(proc.stdout, 2, 2)
-    # Give some time to child to actually startup
+    # Give some time to child to actually startup and output a profile
     time.sleep(3)
     proc.terminate()
     assert proc.wait() == 0
     for pid in worker_pids:
-        utils.check_pprof_file("%s.%d" % (filename, pid))
+        profile = pprof_utils.parse_newest_profile("%s.%d" % (filename, pid))
+        samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
+        assert len(samples) > 0
 
 
-def test_uwsgi_threads_processes_no_master_lazy_apps(uwsgi, tmp_path, monkeypatch):
+def test_uwsgi_threads_processes_no_primary_lazy_apps(uwsgi, tmp_path, monkeypatch):
     filename = str(tmp_path / "uwsgi.pprof")
     monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", filename)
-    proc = uwsgi("--enable-threads", "--processes", "2", "--lazy-apps")
+    monkeypatch.setenv("DD_PROFILING_UPLOAD_INTERVAL", "1")
+    # For uwsgi<2.0.30, --skip-atexit is required to avoid crashes when
+    # the child process exits.
+    proc = uwsgi("--enable-threads", "--processes", "2", "--lazy-apps", "--skip-atexit")
     worker_pids = _get_worker_pids(proc.stdout, 2, 2)
-    # Give some time to child to actually startup
+    assert len(worker_pids) == 2
+
+    # Give some time to child to actually startup and output a profile
     time.sleep(3)
-    # The processes are started without a master/parent so killing one does not kill the other:
-    # Kill them all and wait until they die.
+
+    # Kill master process
+    parent_pid: int = worker_pids[0]
+    os.kill(parent_pid, signal.SIGTERM)
+
+    # Wait for master to exit
+    res_pid, res_status = os.waitpid(parent_pid, 0)
+    print("")
+    print(f"INFO: Master process {parent_pid} exited with status {res_status} and pid {res_pid}")
+
+    # Attempt to kill worker proc once
+    worker_pid: int = worker_pids[1]
+    print(f"DEBUG: Checking worker {worker_pid} status after master exit:")
+    try:
+        os.kill(worker_pid, 0)
+        print(f"WARNING: Worker {worker_pid} is a zombie (will be cleaned up by init).")
+
+        os.kill(worker_pid, signal.SIGKILL)
+        print(f"WARNING: Worker {worker_pid} could not be killed with SIGKILL (will be cleaned up by init).")
+    except OSError:
+        print(f"INFO: Worker {worker_pid} was successfully killed.")
+
     for pid in worker_pids:
-        os.kill(pid, signal.SIGTERM)
-    # The first worker is our child, we can wait for it "normally"
-    os.waitpid(worker_pids[0], 0)
-    # The other ones are grandchildren, we can't wait for it with `waitpid`
-    for pid in worker_pids[1:]:
-        # Wait for the uwsgi workers to all die
-        while True:
-            try:
-                os.kill(pid, 0)
-            except OSError:
-                break
+        profile = pprof_utils.parse_newest_profile("%s.%d" % (filename, pid))
+        samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
+        assert len(samples) > 0
+
+
+@pytest.mark.parametrize("lazy_flag", ["--lazy-apps", "--lazy"])
+@pytest.mark.skipif(
+    tuple(int(x) for x in version("uwsgi").split(".")) >= (2, 0, 30),
+    reason="uwsgi>=2.0.30 does not require --skip-atexit",
+)
+def test_uwsgi_require_skip_atexit_when_lazy_with_master(uwsgi, lazy_flag):
+    expected_warning = b"ddtrace.internal.uwsgi.uWSGIConfigDeprecationWarning: skip-atexit option must be set"
+
+    proc = uwsgi("--enable-threads", "--master", "--processes", "2", lazy_flag)
+    time.sleep(1)
+    proc.terminate()
+    stdout, _ = proc.communicate()
+    assert expected_warning in stdout
+
+
+@pytest.mark.parametrize("lazy_flag", ["--lazy-apps", "--lazy"])
+@pytest.mark.skipif(
+    tuple(int(x) for x in version("uwsgi").split(".")) >= (2, 0, 30),
+    reason="uwsgi>=2.0.30 does not require --skip-atexit",
+)
+def test_uwsgi_require_skip_atexit_when_lazy_without_master(uwsgi, lazy_flag):
+    expected_warning = b"ddtrace.internal.uwsgi.uWSGIConfigDeprecationWarning: skip-atexit option must be set"
+    num_workers = 2
+    proc = uwsgi("--enable-threads", "--processes", str(num_workers), lazy_flag)
+
+    worker_pids = []
+    logged_warning = 0
+    while True:
+        line = proc.stdout.readline()
+        if line == b"":
+            break
+        if expected_warning in line:
+            logged_warning += 1
+        else:
+            m = re.match(r"^spawned uWSGI worker \d+ .*\(pid: (\d+),", line.decode())
+            if m:
+                worker_pids.append(int(m.group(1)))
+
+        if logged_warning == num_workers:
+            break
+
     for pid in worker_pids:
-        utils.check_pprof_file("%s.%d" % (filename, pid))
+        os.kill(pid, signal.SIGTERM)
diff --git a/tests/profiling/utils.py b/tests/profiling/utils.py
deleted file mode 100644
index f9a74ea1943..00000000000
--- a/tests/profiling/utils.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from tests.profiling.collector import pprof_utils
-
-
-def check_pprof_file(
-    filename,  # type: str
-    sample_type="cpu-samples",
-):
-    profile = pprof_utils.parse_newest_profile(filename)
-
-    samples = pprof_utils.get_samples_with_value_type(profile, sample_type)
-    assert len(samples) >= 1
diff --git a/tests/profiling_v2/__init__.py b/tests/profiling_v2/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/profiling_v2/collector/conftest.py b/tests/profiling_v2/collector/conftest.py
deleted file mode 100644
index 7dc1d816091..00000000000
--- a/tests/profiling_v2/collector/conftest.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import pytest
-
-import ddtrace
-
-
-@pytest.fixture
-def tracer():
-    return ddtrace.trace.tracer
diff --git a/tests/profiling_v2/collector/test_memalloc.py b/tests/profiling_v2/collector/test_memalloc.py
deleted file mode 100644
index 1d670aa66d6..00000000000
--- a/tests/profiling_v2/collector/test_memalloc.py
+++ /dev/null
@@ -1,799 +0,0 @@
-import inspect
-import os
-import sys
-import threading
-
-import pytest
-
-from ddtrace.internal.datadog.profiling import ddup
-from ddtrace.profiling.collector import memalloc
-from tests.profiling.collector import pprof_utils
-
-
-PY_313_OR_ABOVE = sys.version_info[:2] >= (3, 13)
-
-
-def _allocate_1k():
-    return [object() for _ in range(1000)]
-
-
-_ALLOC_LINE_NUMBER = _allocate_1k.__code__.co_firstlineno + 1
-
-
-# This test is marked as subprocess as it changes default heap sample size
-@pytest.mark.subprocess(
-    env=dict(DD_PROFILING_HEAP_SAMPLE_SIZE="1024", DD_PROFILING_OUTPUT_PPROF="/tmp/test_heap_samples_collected")
-)
-def test_heap_samples_collected():
-    import os
-
-    from ddtrace.profiling import Profiler
-    from tests.profiling.collector import pprof_utils
-    from tests.profiling_v2.collector.test_memalloc import _allocate_1k
-
-    # Test for https://github.com/DataDog/dd-trace-py/issues/11069
-    pprof_prefix = os.environ["DD_PROFILING_OUTPUT_PPROF"]
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    p = Profiler()
-    p.start()
-    x = _allocate_1k()  # noqa: F841
-    p.stop()
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_value_type(profile, "heap-space")
-    assert len(samples) > 0
-
-
-def test_memory_collector(tmp_path):
-    test_name = "test_memory_collector"
-    pprof_prefix = str(tmp_path / test_name)
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    ddup.config(
-        service=test_name,
-        version="test",
-        env="test",
-        output_filename=pprof_prefix,
-    )
-    ddup.start()
-
-    mc = memalloc.MemoryCollector(heap_sample_size=256)
-    with mc:
-        _allocate_1k()
-        mc.snapshot()
-
-    ddup.upload()
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    # Gets samples with alloc-space > 0
-    samples = pprof_utils.get_samples_with_value_type(profile, "alloc-space")
-
-    assert len(samples) > 0
-
-    alloc_samples_idx = pprof_utils.get_sample_type_index(profile, "alloc-samples")
-    for sample in samples:
-        # We also want to check 'alloc-samples' is > 0.
-        assert sample.value[alloc_samples_idx] > 0
-
-    # We also want to assert that there's a sample that's coming from _allocate_1k()
-    # And also assert that it's actually coming from _allocate_1k()
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            thread_id=threading.main_thread().ident,
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name="_allocate_1k", filename="test_memalloc.py", line_no=_ALLOC_LINE_NUMBER
-                )
-            ],
-        ),
-    )
-
-
-def test_memory_collector_ignore_profiler(tmp_path):
-    test_name = "test_memory_collector_ignore_profiler"
-    pprof_prefix = str(tmp_path / test_name)
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    ddup.config(
-        service=test_name,
-        version="test",
-        env="test",
-        output_filename=pprof_prefix,
-    )
-    ddup.start()
-
-    mc = memalloc.MemoryCollector(ignore_profiler=True)
-    quit_thread = threading.Event()
-
-    with mc:
-
-        def alloc():
-            _allocate_1k()
-            quit_thread.wait()
-
-        alloc_thread = threading.Thread(name="allocator", target=alloc)
-        alloc_thread._ddtrace_profiling_ignore = True
-        alloc_thread.start()
-
-        mc.snapshot()
-
-    # We need to wait for the data collection to happen so it gets the `_ddtrace_profiling_ignore` Thread attribute from
-    # the global thread list.
-    quit_thread.set()
-    alloc_thread.join()
-
-    ddup.upload()
-
-    try:
-        pprof_utils.parse_newest_profile(output_filename)
-    except AssertionError as e:
-        assert "No samples found" in str(e)
-
-
-@pytest.mark.subprocess(
-    env=dict(DD_PROFILING_HEAP_SAMPLE_SIZE="8", DD_PROFILING_OUTPUT_PPROF="/tmp/test_heap_profiler_large_heap_overhead")
-)
-def test_heap_profiler_large_heap_overhead():
-    # TODO(nick): this test case used to crash due to integer arithmetic bugs.
-    # Now it doesn't crash, but it takes far too long to run to be useful in CI.
-    # Un-skip this test if/when we improve the worst-case performance of the
-    # heap profiler for large heaps
-    from ddtrace.profiling import Profiler
-    from tests.profiling_v2.collector.test_memalloc import one
-
-    p = Profiler()
-    p.start()
-
-    count = 100_000
-    thing_size = 32
-
-    junk = []
-    for i in range(count):
-        b1 = one(thing_size)
-        b2 = one(2 * thing_size)
-        b3 = one(3 * thing_size)
-        b4 = one(4 * thing_size)
-        t = (b1, b2, b3, b4)
-        junk.append(t)
-
-    del junk
-
-    p.stop()
-
-
-# one, two, three, and four exist to give us distinct things
-# we can find in the profile without depending on something
-# like the line number at which an allocation happens
-# Python 3.13 changed bytearray to use an allocation domain that we don't
-# currently profile, so we use None instead of bytearray to test.
-def one(size):
-    return (None,) * size if PY_313_OR_ABOVE else bytearray(size)
-
-
-def two(size):
-    return (None,) * size if PY_313_OR_ABOVE else bytearray(size)
-
-
-def three(size):
-    return (None,) * size if PY_313_OR_ABOVE else bytearray(size)
-
-
-def four(size):
-    return (None,) * size if PY_313_OR_ABOVE else bytearray(size)
-
-
-def _create_allocation(size):
-    return (None,) * size if PY_313_OR_ABOVE else bytearray(size)
-
-
-class HeapInfo:
-    def __init__(self, count, size):
-        self.count = count
-        self.size = size
-
-
-def get_heap_info(heap, funcs):
-    got = {}
-    for event in heap:
-        (frames, _), in_use_size, alloc_size, count = event
-
-        in_use = in_use_size > 0
-        size = in_use_size if in_use_size > 0 else alloc_size
-
-        if not in_use:
-            continue
-        func = frames[0].function_name
-        if func in funcs:
-            v = got.get(func, HeapInfo(0, 0))
-            v.count += 1
-            v.size += size
-            got[func] = v
-    return got
-
-
-def has_function_in_traceback(frames, function_name):
-    return any(frame.function_name == function_name for frame in frames)
-
-
-def get_tracemalloc_stats_per_func(stats, funcs):
-    source_to_func = {}
-
-    for f in funcs:
-        file = inspect.getsourcefile(f)
-        line = inspect.getsourcelines(f)[1] + 1
-        source_to_func[str(file) + str(line)] = f.__name__
-
-    actual_sizes = {}
-    actual_counts = {}
-    for stat in stats:
-        f = stat.traceback[0]
-        key = f.filename + str(f.lineno)
-        if key in source_to_func:
-            func_name = source_to_func[key]
-            actual_sizes[func_name] = stat.size
-            actual_counts[func_name] = stat.count
-    return actual_sizes, actual_counts
-
-
-# TODO: higher sampling intervals have a lot more variance and are flaky
-# but would be nice to test since our default is 1MiB
-@pytest.mark.parametrize("sample_interval", (8, 512, 1024))
-def test_heap_profiler_sampling_accuracy(sample_interval):
-    # tracemalloc lets us get ground truth on how many allocations there were
-    import tracemalloc
-
-    # TODO(nick): use Profiler instead of _memalloc
-    from ddtrace.profiling.collector import _memalloc
-
-    # We seed the RNG to reduce flakiness. This doesn't actually diminish the
-    # quality of the test much. A broken sampling implementation is unlikely to
-    # pass for an arbitrary seed.
-    old = os.environ.get("_DD_MEMALLOC_DEBUG_RNG_SEED")
-    os.environ["_DD_MEMALLOC_DEBUG_RNG_SEED"] = "42"
-    _memalloc.start(32, sample_interval)
-    # Put the env var back in the state we found it
-    if old is not None:
-        os.environ["_DD_MEMALLOC_DEBUG_RNG_SEED"] = old
-    else:
-        del os.environ["_DD_MEMALLOC_DEBUG_RNG_SEED"]
-
-    tracemalloc.start()
-
-    junk = []
-    for i in range(1000):
-        size = 256
-        junk.append(one(size))
-        junk.append(two(2 * size))
-        junk.append(three(3 * size))
-        junk.append(four(4 * size))
-
-    # TODO(nick): randomly remove things from junk to see if the profile is
-    # still accurate
-
-    # Stop tracemalloc before collecting the heap sample, since tracemalloc
-    # is _really_ slow when the _memalloc.heap() call does lots of allocs for
-    # lower sample intervals (i.e. more sampled allocations)
-    stats = tracemalloc.take_snapshot().statistics("traceback")
-    tracemalloc.stop()
-
-    heap = _memalloc.heap()
-    # Important: stop _memalloc _after_ tracemalloc. Need to remove allocator
-    # hooks in LIFO order.
-    _memalloc.stop()
-
-    actual_sizes, _ = get_tracemalloc_stats_per_func(stats, (one, two, three, four))
-    actual_total = sum(actual_sizes.values())
-
-    del junk
-
-    sizes = get_heap_info(heap, {"one", "two", "three", "four"})
-
-    total = sum(v.size for v in sizes.values())
-    print(f"observed total: {total} actual total: {actual_total} error: {abs(total - actual_total) / actual_total}")
-    # 20% error in actual size feels pretty generous
-    # TODO(nick): justify in terms of variance of sampling?
-    assert abs(1 - total / actual_total) <= 0.20
-
-    print("func\tcount\tsize\tactual\trel\tactual\tdiff")
-    for func in ("one", "two", "three", "four"):
-        got = sizes[func]
-        actual_size = actual_sizes[func]
-
-        # Relative portion of the bytes in the profile for this function
-        # out of the functions we're interested in
-        rel = got.size / total
-        actual_rel = actual_size / actual_total
-
-        print(
-            f"{func}\t{got.count}\t{got.size}\t{actual_size}\t{rel:.3f}\t{actual_rel:.3f}\t{abs(rel - actual_rel):.3f}"
-        )
-
-        # Assert that the reported portion of this function in the profile is
-        # pretty close to the actual portion. So, if it's actually ~20% of the
-        # profile then we'd accept anything between 10% and 30%, which is
-        # probably too generous for low sampling intervals but at least won't be
-        # flaky.
-        assert abs(rel - actual_rel) < 0.10
-
-
-@pytest.mark.skip(reason="too slow, indeterministic")
-@pytest.mark.subprocess(
-    env=dict(
-        # Turn off other profilers so that we're just testing memalloc
-        DD_PROFILING_STACK_ENABLED="false",
-        DD_PROFILING_LOCK_ENABLED="false",
-        # Upload a lot, since rotating out memalloc profiler state can race with profiling
-        DD_PROFILING_UPLOAD_INTERVAL="1",
-    ),
-)
-def test_memealloc_data_race_regression():
-    import gc
-    import threading
-    import time
-
-    from ddtrace.profiling import Profiler
-
-    gc.enable()
-    # This threshold is controls when garbage collection is triggered. The
-    # threshold is on the count of live allocations, which is checked when doing
-    # a new allocation. This test is ultimately trying to get the allocation of
-    # frame objects during the memory profiler's traceback function to trigger
-    # garbage collection. We want a lower threshold to improve the odds that
-    # this happens.
-    gc.set_threshold(100)
-
-    class Thing:
-        def __init__(self):
-            # Self reference so this gets deallocated in GC vs via refcount
-            self.ref = self
-
-        def __del__(self):
-            # Force GIL yield,  so if/when memalloc triggers GC, this is
-            # deallocated, releasing GIL while memalloc is sampling and allowing
-            # something else to run and possibly modify memalloc's internal
-            # state concurrently
-            time.sleep(0)
-
-    def do_alloc():
-        def f():
-            return Thing()
-
-        return f
-
-    def lotsa_allocs(ev):
-        while not ev.is_set():
-            f = do_alloc()
-            f()
-            time.sleep(0.01)
-
-    p = Profiler()
-    p.start()
-
-    threads = []
-    ev = threading.Event()
-    for i in range(4):
-        t = threading.Thread(target=lotsa_allocs, args=(ev,))
-        t.start()
-        threads.append(t)
-
-    # Arbitrary sleep. This typically crashes in about a minute.
-    # But for local development, either let it run way longer or
-    # figure out sanitizer instrumentation
-    time.sleep(120)
-
-    p.stop()
-
-    ev.set()
-    for t in threads:
-        t.join()
-
-
-@pytest.mark.parametrize("sample_interval", (256, 512, 1024))
-def test_memory_collector_allocation_accuracy_with_tracemalloc(sample_interval):
-    import tracemalloc
-
-    old = os.environ.get("_DD_MEMALLOC_DEBUG_RNG_SEED")
-    os.environ["_DD_MEMALLOC_DEBUG_RNG_SEED"] = "42"
-
-    mc = memalloc.MemoryCollector(heap_sample_size=sample_interval)
-
-    try:
-        with mc:
-            tracemalloc.start()
-
-            junk = []
-            for i in range(1000):
-                size = 256
-                junk.append(one(size))
-                junk.append(two(2 * size))
-                junk.append(three(3 * size))
-                junk.append(four(4 * size))
-
-            stats = tracemalloc.take_snapshot().statistics("traceback")
-            tracemalloc.stop()
-
-            del junk
-
-            samples = mc.test_snapshot()
-
-    finally:
-        if old is not None:
-            os.environ["_DD_MEMALLOC_DEBUG_RNG_SEED"] = old
-        else:
-            if "_DD_MEMALLOC_DEBUG_RNG_SEED" in os.environ:
-                del os.environ["_DD_MEMALLOC_DEBUG_RNG_SEED"]
-
-    allocation_samples = [s for s in samples if s.in_use_size == 0]
-    heap_samples = [s for s in samples if s.in_use_size > 0]
-
-    print(f"Total samples: {len(samples)}")
-    print(f"Allocation samples (in_use_size=0): {len(allocation_samples)}")
-    print(f"Heap samples (in_use_size>0): {len(heap_samples)}")
-
-    assert len(allocation_samples) > 0, "Should have captured allocation samples after deletion"
-
-    total_allocation_count = 0
-    for sample in allocation_samples:
-        assert sample.size > 0, f"Invalid allocation sample size: {sample.size}"
-        assert sample.count > 0, f"Invalid allocation sample count: {sample.count}"
-        assert sample.in_use_size == 0, f"Allocation sample should have in_use_size=0, got: {sample.in_use_size}"
-        assert sample.in_use_size >= 0, f"Invalid in_use_size: {sample.in_use_size}"
-        assert sample.alloc_size >= 0, f"Invalid alloc_size: {sample.alloc_size}"
-        total_allocation_count += sample.count
-
-    print(f"Total allocation count: {total_allocation_count}")
-    assert total_allocation_count >= 1, "Should have captured at least 1 allocation sample"
-
-    actual_sizes, actual_counts = get_tracemalloc_stats_per_func(stats, (one, two, three, four))
-    actual_total = sum(actual_sizes.values())
-    actual_count_total = sum(actual_counts.values())
-
-    def get_allocation_info(samples, funcs):
-        got = {}
-        for sample in samples:
-            if sample.in_use_size > 0:
-                continue
-
-            for frame in sample.frames:
-                func = frame.function_name
-                if func in funcs:
-                    v = got.get(func, HeapInfo(0, 0))
-                    v.count += sample.count
-                    v.size += sample.alloc_size
-                    got[func] = v
-                    break
-        return got
-
-    sizes = get_allocation_info(samples, {"one", "two", "three", "four"})
-
-    total = sum(v.size for v in sizes.values())
-    total_count = sum(v.count for v in sizes.values())
-
-    print(f"observed total: {total} actual total: {actual_total} error: {abs(total - actual_total) / actual_total}")
-    assert abs(1 - total / actual_total) <= 0.20
-
-    count_error = abs(total_count - actual_count_total) / actual_count_total
-    print(f"observed count total: {total_count} actual count total: {actual_count_total} error: {count_error}")
-    # Commenting out the total count assertions because we still have more work to do on this.
-    # Our reported counts differed from the actual count by more than we expected, while the reported sizes
-    # are accurate. Our counts seem to be consistently lower than expected for the sample intervals we're testing.
-    # We'll need to double-check our count scaling before making assertions about the actual values
-    # assert abs(1 - total_count / actual_count_total) <= 0.30
-
-    print("func\tcount\tsize\tactual_size\tactual_count\trel_size\tactual_rel_size\trel_count\tactual_rel_count")
-    for func in ("one", "two", "three", "four"):
-        got = sizes[func]
-        actual_size = actual_sizes[func]
-        actual_count = actual_counts[func]
-
-        rel_size = got.size / total
-        actual_rel_size = actual_size / actual_total
-
-        rel_count = got.count / total_count
-        actual_rel_count = actual_count / actual_count_total
-
-        print(
-            f"{func}\t{got.count}\t{got.size}\t{actual_size}\t{actual_count}\t{rel_size:.3f}\t{actual_rel_size:.3f}\t{rel_count:.3f}\t{actual_rel_count:.3f}"
-        )
-
-        assert abs(rel_size - actual_rel_size) < 0.10
-        assert abs(rel_count - actual_rel_count) < 0.15
-
-    print(f"Successfully validated allocation sampling accuracy for sample_interval={sample_interval}")
-    print(f"Captured {len(allocation_samples)} allocation samples representing {total_allocation_count} allocations")
-
-
-def test_memory_collector_allocation_tracking_across_snapshots():
-    mc = memalloc.MemoryCollector(heap_sample_size=64)
-
-    with mc:
-        data_to_free = []
-        for i in range(10):
-            data_to_free.append(one(256))
-
-        data_to_keep = []
-        for i in range(10):
-            data_to_keep.append(two(512))
-
-        del data_to_free
-
-        samples = mc.test_snapshot()
-
-        assert all(sample.alloc_size > 0 for sample in samples), (
-            "Initial snapshot should have alloc_size>0 (new allocations)"
-        )
-
-        freed_samples = [s for s in samples if s.in_use_size == 0]
-        live_samples = [s for s in samples if s.in_use_size > 0]
-
-        assert len(freed_samples) > 0, "Should have some freed samples after deletion"
-
-        assert len(live_samples) > 0, "Should have some live samples"
-
-        for sample in samples:
-            assert sample.size > 0, f"Invalid size: {sample.size}"
-            assert sample.count > 0, f"Invalid count: {sample.count}"
-            assert sample.in_use_size >= 0, f"Invalid in_use_size: {sample.in_use_size}"
-            assert sample.alloc_size >= 0, f"Invalid alloc_size: {sample.alloc_size}"
-
-        one_freed_samples = [sample for sample in samples if has_function_in_traceback(sample.frames, "one")]
-
-        assert len(one_freed_samples) > 0, "Should have freed samples from function 'one'"
-        assert all(sample.in_use_size == 0 and sample.alloc_size > 0 for sample in one_freed_samples)
-
-        two_live_samples = [sample for sample in samples if has_function_in_traceback(sample.frames, "two")]
-
-        assert len(two_live_samples) > 0, "Should have live samples from function 'two'"
-        assert all(sample.in_use_size > 0 and sample.alloc_size > 0 for sample in two_live_samples)
-
-        del data_to_keep
-
-
-def test_memory_collector_python_interface_with_allocation_tracking():
-    mc = memalloc.MemoryCollector(heap_sample_size=128)
-
-    with mc:
-        first_batch = []
-        for i in range(20):
-            first_batch.append(one(256))
-
-        # We're taking a snapshot here to ensure that in the next snapshot, we don't see any "one" allocations
-        mc.test_snapshot()
-
-        second_batch = []
-        for i in range(15):
-            second_batch.append(two(512))
-
-        del first_batch
-
-        final_samples = mc.test_snapshot()
-
-        assert len(final_samples) >= 0, "Final snapshot should be valid"
-
-        for sample in final_samples:
-            assert sample.size > 0, f"Size should be positive int, got {sample.size}"
-            assert sample.count > 0, f"Count should be positive int, got {sample.count}"
-            assert sample.in_use_size >= 0, f"in_use_size should be non-negative int, got {sample.in_use_size}"
-            assert sample.alloc_size >= 0, f"alloc_size should be non-negative int, got {sample.alloc_size}"
-
-        one_samples_in_final = [sample for sample in final_samples if has_function_in_traceback(sample.frames, "one")]
-
-        assert len(one_samples_in_final) == 0, (
-            f"Should have no samples with 'one' in traceback in final_samples, got {len(one_samples_in_final)}"
-        )
-
-        batch_two_live_samples = [
-            sample
-            for sample in final_samples
-            if has_function_in_traceback(sample.frames, "two") and sample.in_use_size > 0
-        ]
-
-        assert len(batch_two_live_samples) > 0, (
-            f"Should have live samples from batch two, got {len(batch_two_live_samples)}"
-        )
-        assert all(sample.in_use_size > 0 and sample.alloc_size > 0 for sample in batch_two_live_samples)
-
-        del second_batch
-
-
-def test_memory_collector_python_interface_with_allocation_tracking_no_deletion():
-    mc = memalloc.MemoryCollector(heap_sample_size=128)
-
-    with mc:
-        initial_samples = mc.test_snapshot()
-        initial_count = len(initial_samples)
-
-        first_batch = []
-        for i in range(20):
-            first_batch.append(one(256))
-
-        after_first_batch = mc.test_snapshot()
-
-        second_batch = []
-        for i in range(15):
-            second_batch.append(two(512))
-
-        final_samples = mc.test_snapshot()
-
-        assert len(after_first_batch) >= initial_count, "Should have at least as many samples after first batch"
-        assert len(final_samples) >= 0, "Final snapshot should be valid"
-
-        for samples in [initial_samples, after_first_batch, final_samples]:
-            for sample in samples:
-                assert sample.size > 0, f"Size should be positive int, got {sample.size}"
-                assert sample.count > 0, f"Count should be positive int, got {sample.count}"
-                assert sample.in_use_size >= 0, f"in_use_size should be non-negative int, got {sample.in_use_size}"
-                assert sample.alloc_size >= 0, f"alloc_size should be non-negative int, got {sample.alloc_size}"
-
-        batch_one_live_samples = [
-            sample
-            for sample in final_samples
-            if has_function_in_traceback(sample.frames, "one") and sample.in_use_size > 0
-        ]
-
-        batch_two_live_samples = [
-            sample
-            for sample in final_samples
-            if has_function_in_traceback(sample.frames, "two") and sample.in_use_size > 0
-        ]
-
-        assert len(batch_one_live_samples) > 0, (
-            f"Should have live samples from batch one, got {len(batch_one_live_samples)}"
-        )
-        assert len(batch_two_live_samples) > 0, (
-            f"Should have live samples from batch two, got {len(batch_two_live_samples)}"
-        )
-
-        assert all(sample.in_use_size > 0 and sample.alloc_size == 0 for sample in batch_one_live_samples)
-        assert all(sample.in_use_size > 0 and sample.alloc_size > 0 for sample in batch_two_live_samples)
-
-        del first_batch
-        del second_batch
-
-
-def test_memory_collector_exception_handling():
-    mc = memalloc.MemoryCollector(heap_sample_size=256)
-
-    with pytest.raises(ValueError):
-        with mc:
-            _allocate_1k()
-            samples = mc.test_snapshot()
-            assert isinstance(samples, tuple)
-            raise ValueError("Test exception")
-
-    with mc:
-        _allocate_1k()
-        samples = mc.test_snapshot()
-        assert isinstance(samples, tuple)
-
-
-def test_memory_collector_allocation_during_shutdown():
-    """Test that verifies that when _memalloc.stop() is called while allocations are still
-    happening in another thread, the shutdown process completes without deadlocks or crashes.
-    """
-    import time
-
-    from ddtrace.profiling.collector import _memalloc
-
-    _memalloc.start(32, 512)
-
-    shutdown_event = threading.Event()
-    allocation_thread = None
-
-    def allocate_continuously():
-        while not shutdown_event.is_set():
-            data = [0] * 100
-            del data
-            time.sleep(0.001)
-
-    try:
-        allocation_thread = threading.Thread(target=allocate_continuously)
-        allocation_thread.start()
-
-        time.sleep(0.1)
-
-        _memalloc.stop()
-
-    finally:
-        shutdown_event.set()
-        if allocation_thread:
-            allocation_thread.join(timeout=1)
-
-
-def test_memory_collector_buffer_pool_exhaustion():
-    """Test that the memory collector handles buffer pool exhaustion.
-    This test creates multiple threads that simultaneously allocate with very deep
-    stack traces, which could potentially exhaust internal buffer pools.
-    """
-    mc = memalloc.MemoryCollector(heap_sample_size=64)
-
-    with mc:
-        threads = []
-        barrier = threading.Barrier(10)
-
-        def allocate_with_traceback():
-            barrier.wait()
-
-            def deep_alloc(depth):
-                if depth == 0:
-                    return _create_allocation(100)
-                return deep_alloc(depth - 1)
-
-            data = deep_alloc(50)
-            del data
-
-        for i in range(10):
-            t = threading.Thread(target=allocate_with_traceback)
-            threads.append(t)
-            t.start()
-
-        for t in threads:
-            t.join()
-
-        samples = mc.test_snapshot()
-
-        deep_alloc_count = 0
-        max_stack_depth = 0
-
-        for sample in samples:
-            assert sample.frames is not None, "Buffer pool test: All samples should have stack frames"
-            stack_depth = len(sample.frames)
-            max_stack_depth = max(max_stack_depth, stack_depth)
-
-            for frame in sample.frames:
-                if frame.function_name == "deep_alloc":
-                    deep_alloc_count += 1
-                    break
-
-        assert deep_alloc_count >= 10, (
-            f"Buffer pool test: Expected many allocations from concurrent threads, got {deep_alloc_count}"
-        )
-
-        assert max_stack_depth >= 50, (
-            f"Buffer pool test: Stack traces should be preserved even under stress (expecting at least 50 frames), "
-            f"but max depth was only {max_stack_depth}"
-        )
-
-
-def test_memory_collector_thread_lifecycle():
-    """Test that continuously creates and destroys threads while they perform allocations,
-    verifying that the collector can track allocations across changing thread contexts.
-    """
-    mc = memalloc.MemoryCollector(heap_sample_size=8)
-
-    with mc:
-        threads = []
-
-        def worker():
-            for i in range(10):
-                data = [i] * 100
-                del data
-
-        for i in range(20):
-            t = threading.Thread(target=worker)
-            t.start()
-            threads.append(t)
-
-            if i > 5:
-                old_thread = threads.pop(0)
-                old_thread.join()
-
-        for t in threads:
-            t.join()
-
-        samples = mc.test_snapshot()
-
-        worker_samples = 0
-        for sample in samples:
-            for frame in sample.frames:
-                if frame.function_name == "worker":
-                    worker_samples += 1
-                    break
-
-        assert worker_samples > 0, (
-            "Thread lifecycle test: Should capture allocations even as threads are created/destroyed"
-        )
diff --git a/tests/profiling_v2/collector/test_stack.py b/tests/profiling_v2/collector/test_stack.py
deleted file mode 100644
index 20baf1eb265..00000000000
--- a/tests/profiling_v2/collector/test_stack.py
+++ /dev/null
@@ -1,573 +0,0 @@
-import _thread
-import os
-import sys
-import threading
-import time
-from unittest.mock import patch
-import uuid
-
-import pytest
-
-from ddtrace import ext
-from ddtrace.internal.datadog.profiling import ddup
-from ddtrace.profiling.collector import stack
-from tests.profiling.collector import pprof_utils
-from tests.profiling.collector import test_collector
-
-
-# Python 3.11.9 is not compatible with gevent, https://github.com/gevent/gevent/issues/2040
-# https://github.com/python/cpython/issues/117983
-# The fix was not backported to 3.11. The fix was first released in 3.12.5 for
-# Python 3.12. Tested with Python 3.11.8 and 3.12.5 to confirm the issue.
-TESTING_GEVENT = os.getenv("DD_PROFILE_TEST_GEVENT", False) and (
-    sys.version_info < (3, 11, 9) or sys.version_info >= (3, 12, 5)
-)
-
-
-# Use subprocess as ddup config persists across tests.
-@pytest.mark.subprocess(
-    env=dict(
-        DD_PROFILING_MAX_FRAMES="5",
-        DD_PROFILING_OUTPUT_PPROF="/tmp/test_collect_truncate",
-    )
-)
-def test_collect_truncate():
-    import os
-
-    from ddtrace.profiling import profiler
-    from tests.profiling.collector import pprof_utils
-    from tests.profiling.collector.test_stack import func1
-
-    pprof_prefix = os.environ["DD_PROFILING_OUTPUT_PPROF"]
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    max_nframes = int(os.environ["DD_PROFILING_MAX_FRAMES"])
-
-    p = profiler.Profiler()
-    p.start()
-
-    func1()
-
-    p.stop()
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
-    assert len(samples) > 0
-    for sample in samples:
-        # stack v2 adds one extra frame for "%d frames omitted" message
-        # Also, it allows max_nframes + 1 frames, so we add 2 here.
-        assert len(sample.location_id) <= max_nframes + 2, len(sample.location_id)
-
-
-def test_stack_locations(tmp_path):
-    test_name = "test_stack_locations"
-    pprof_prefix = str(tmp_path / test_name)
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    assert ddup.is_available
-    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
-    ddup.start()
-
-    def baz():
-        time.sleep(0.1)
-
-    def bar():
-        baz()
-
-    def foo():
-        bar()
-
-    with stack.StackCollector():
-        for _ in range(10):
-            foo()
-    ddup.upload()
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
-    assert len(samples) > 0
-
-    expected_sample = pprof_utils.StackEvent(
-        thread_id=_thread.get_ident(),
-        thread_name="MainThread",
-        locations=[
-            pprof_utils.StackLocation(
-                function_name="baz",
-                filename="test_stack.py",
-                line_no=baz.__code__.co_firstlineno + 1,
-            ),
-            pprof_utils.StackLocation(
-                function_name="bar",
-                filename="test_stack.py",
-                line_no=bar.__code__.co_firstlineno + 1,
-            ),
-            pprof_utils.StackLocation(
-                function_name="foo",
-                filename="test_stack.py",
-                line_no=foo.__code__.co_firstlineno + 1,
-            ),
-        ],
-    )
-
-    pprof_utils.assert_profile_has_sample(profile, samples=samples, expected_sample=expected_sample)
-
-
-def test_push_span(tmp_path, tracer):
-    test_name = "test_push_span"
-    pprof_prefix = str(tmp_path / test_name)
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    tracer._endpoint_call_counter_span_processor.enable()
-
-    assert ddup.is_available
-    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
-    ddup.start()
-
-    resource = str(uuid.uuid4())
-    span_type = ext.SpanTypes.WEB
-
-    with stack.StackCollector(
-        tracer=tracer,
-    ):
-        with tracer.trace("foobar", resource=resource, span_type=span_type) as span:
-            span_id = span.span_id
-            local_root_span_id = span._local_root.span_id
-            for _ in range(10):
-                time.sleep(0.1)
-    ddup.upload(tracer=tracer)
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_label_key(profile, "span id")
-    assert len(samples) > 0
-    for sample in samples:
-        pprof_utils.assert_stack_event(
-            profile,
-            sample,
-            expected_event=pprof_utils.StackEvent(
-                span_id=span_id,
-                local_root_span_id=local_root_span_id,
-                trace_type=span_type,
-                trace_endpoint=resource,
-            ),
-        )
-
-
-def test_push_span_unregister_thread(tmp_path, monkeypatch, tracer):
-    with patch("ddtrace.internal.datadog.profiling.stack_v2.unregister_thread") as unregister_thread:
-        tracer._endpoint_call_counter_span_processor.enable()
-
-        test_name = "test_push_span_unregister_thread"
-        pprof_prefix = str(tmp_path / test_name)
-        output_filename = pprof_prefix + "." + str(os.getpid())
-
-        assert ddup.is_available
-        ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
-        ddup.start()
-
-        resource = str(uuid.uuid4())
-        span_type = ext.SpanTypes.WEB
-
-        def target_fun():
-            for _ in range(10):
-                time.sleep(0.1)
-
-        with stack.StackCollector(
-            tracer=tracer,
-        ):
-            with tracer.trace("foobar", resource=resource, span_type=span_type) as span:
-                span_id = span.span_id
-                local_root_span_id = span._local_root.span_id
-                t = threading.Thread(target=target_fun)
-                t.start()
-                t.join()
-                thread_id = t.ident
-        ddup.upload(tracer=tracer)
-
-        profile = pprof_utils.parse_newest_profile(output_filename)
-        samples = pprof_utils.get_samples_with_label_key(profile, "span id")
-        assert len(samples) > 0
-        for sample in samples:
-            pprof_utils.assert_stack_event(
-                profile,
-                sample,
-                expected_event=pprof_utils.StackEvent(
-                    span_id=span_id,
-                    local_root_span_id=local_root_span_id,
-                    trace_type=span_type,
-                    trace_endpoint=resource,
-                ),
-            )
-
-        unregister_thread.assert_called_with(thread_id)
-
-
-def test_push_non_web_span(tmp_path, tracer):
-    tracer._endpoint_call_counter_span_processor.enable()
-
-    test_name = "test_push_non_web_span"
-    pprof_prefix = str(tmp_path / test_name)
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    assert ddup.is_available
-    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
-    ddup.start()
-
-    resource = str(uuid.uuid4())
-    span_type = ext.SpanTypes.SQL
-
-    with stack.StackCollector(
-        tracer=tracer,
-    ):
-        with tracer.trace("foobar", resource=resource, span_type=span_type) as span:
-            span_id = span.span_id
-            local_root_span_id = span._local_root.span_id
-            for _ in range(10):
-                time.sleep(0.1)
-    ddup.upload(tracer=tracer)
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_label_key(profile, "span id")
-    assert len(samples) > 0
-    for sample in samples:
-        pprof_utils.assert_stack_event(
-            profile,
-            sample,
-            expected_event=pprof_utils.StackEvent(
-                span_id=span_id,
-                local_root_span_id=local_root_span_id,
-                trace_type=span_type,
-                # trace_endpoint is not set for non-web spans
-            ),
-        )
-
-
-def test_push_span_none_span_type(tmp_path, tracer):
-    # Test for https://github.com/DataDog/dd-trace-py/issues/11141
-    test_name = "test_push_span_none_span_type"
-    pprof_prefix = str(tmp_path / test_name)
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    assert ddup.is_available
-    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
-    ddup.start()
-
-    tracer._endpoint_call_counter_span_processor.enable()
-
-    resource = str(uuid.uuid4())
-
-    with stack.StackCollector(
-        tracer=tracer,
-    ):
-        # Explicitly set None span_type as the default could change in the
-        # future.
-        with tracer.trace("foobar", resource=resource, span_type=None) as span:
-            span_id = span.span_id
-            local_root_span_id = span._local_root.span_id
-            for _ in range(10):
-                time.sleep(0.1)
-    ddup.upload(tracer=tracer)
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_label_key(profile, "span id")
-    assert len(samples) > 0
-    for sample in samples:
-        pprof_utils.assert_stack_event(
-            profile,
-            sample,
-            expected_event=pprof_utils.StackEvent(
-                span_id=span_id,
-                local_root_span_id=local_root_span_id,
-                # span_type is None
-                # trace_endpoint is not set for non-web spans
-            ),
-        )
-
-
-def test_exception_collection(tmp_path):
-    test_name = "test_exception_collection"
-    pprof_prefix = str(tmp_path / test_name)
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    assert ddup.is_available
-    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
-    ddup.start()
-
-    with stack.StackCollector():
-        try:
-            raise ValueError("hello")
-        except Exception:
-            time.sleep(1)
-
-    ddup.upload()
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_label_key(profile, "exception type")
-
-    # DEV: update the test once we have exception profiling for stack v2
-    # using echion
-    assert len(samples) == 0
-
-
-def test_exception_collection_threads(tmp_path):
-    test_name = "test_exception_collection_threads"
-    pprof_prefix = str(tmp_path / test_name)
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    assert ddup.is_available
-    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
-    ddup.start()
-
-    with stack.StackCollector():
-
-        def target_fun():
-            try:
-                raise ValueError("hello")
-            except Exception:
-                time.sleep(1)
-
-        threads = []
-        for _ in range(10):
-            t = threading.Thread(target=target_fun)
-            threads.append(t)
-            t.start()
-
-        for t in threads:
-            t.join()
-
-    ddup.upload()
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_label_key(profile, "exception type")
-
-    assert len(samples) == 0
-
-
-def test_exception_collection_trace(tmp_path, tracer):
-    test_name = "test_exception_collection_trace"
-    pprof_prefix = str(tmp_path / test_name)
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    tracer._endpoint_call_counter_span_processor.enable()
-
-    assert ddup.is_available
-    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
-    ddup.start()
-
-    with stack.StackCollector(tracer=tracer):
-        with tracer.trace("foobar", resource="resource", span_type=ext.SpanTypes.WEB):
-            try:
-                raise ValueError("hello")
-            except Exception:
-                time.sleep(1)
-
-    ddup.upload(tracer=tracer)
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_label_key(profile, "exception type")
-
-    assert len(samples) == 0
-
-
-def test_collect_once_with_class(tmp_path):
-    class SomeClass(object):
-        @classmethod
-        def sleep_class(cls):
-            return cls().sleep_instance()
-
-        def sleep_instance(self):
-            for _ in range(10):
-                time.sleep(0.1)
-
-    test_name = "test_collect_once_with_class"
-    pprof_prefix = str(tmp_path / test_name)
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    assert ddup.is_available
-    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
-    ddup.start()
-
-    with stack.StackCollector():
-        SomeClass.sleep_class()
-
-    ddup.upload()
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
-    assert len(samples) > 0
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples=samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_id=_thread.get_ident(),
-            thread_name="MainThread",
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name="sleep_instance",
-                    filename="test_stack.py",
-                    line_no=SomeClass.sleep_instance.__code__.co_firstlineno + 2,
-                ),
-                pprof_utils.StackLocation(
-                    function_name="sleep_class",
-                    filename="test_stack.py",
-                    line_no=SomeClass.sleep_class.__code__.co_firstlineno + 2,
-                ),
-                pprof_utils.StackLocation(
-                    function_name="test_collect_once_with_class",
-                    filename="test_stack.py",
-                    line_no=test_collect_once_with_class.__code__.co_firstlineno + 19,
-                ),
-            ],
-        ),
-    )
-
-
-def test_collect_once_with_class_not_right_type(tmp_path):
-    class SomeClass(object):
-        @classmethod
-        def sleep_class(foobar, cls):
-            return foobar().sleep_instance(cls)
-
-        def sleep_instance(foobar, self):
-            for _ in range(10):
-                time.sleep(0.1)
-
-    test_name = "test_collect_once_with_class"
-    pprof_prefix = str(tmp_path / test_name)
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    assert ddup.is_available
-    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
-    ddup.start()
-
-    with stack.StackCollector():
-        SomeClass.sleep_class(123)
-
-    ddup.upload()
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
-    assert len(samples) > 0
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples=samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_id=_thread.get_ident(),
-            thread_name="MainThread",
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name="sleep_instance",
-                    filename="test_stack.py",
-                    line_no=SomeClass.sleep_instance.__code__.co_firstlineno + 2,
-                ),
-                pprof_utils.StackLocation(
-                    function_name="sleep_class",
-                    filename="test_stack.py",
-                    line_no=SomeClass.sleep_class.__code__.co_firstlineno + 2,
-                ),
-                pprof_utils.StackLocation(
-                    function_name="test_collect_once_with_class_not_right_type",
-                    filename="test_stack.py",
-                    line_no=test_collect_once_with_class_not_right_type.__code__.co_firstlineno + 19,
-                ),
-            ],
-        ),
-    )
-
-
-def _fib(n):
-    if n == 1:
-        return 1
-    elif n == 0:
-        return 0
-    else:
-        return _fib(n - 1) + _fib(n - 2)
-
-
-@pytest.mark.skipif(not TESTING_GEVENT, reason="Not testing gevent")
-@pytest.mark.subprocess(ddtrace_run=True)
-def test_collect_gevent_thread_task():
-    # TODO(taegyunkim): update echion to support gevent and test with stack v2
-
-    from gevent import monkey
-
-    monkey.patch_all()
-
-    import os
-    import threading
-    import time
-
-    from ddtrace.internal.datadog.profiling import ddup
-    from ddtrace.profiling.collector import stack
-    from tests.profiling.collector import pprof_utils
-    from tests.profiling_v2.collector.test_stack import _fib
-
-    test_name = "test_collect_gevent_thread_task"
-    pprof_prefix = "/tmp/" + test_name
-    output_filename = pprof_prefix + "." + str(os.getpid())
-
-    assert ddup.is_available
-    ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
-    ddup.start()
-
-    # Start some (green)threads
-    def _dofib():
-        for _ in range(5):
-            # spend some time in CPU so the profiler can catch something
-            # On a Mac w/ Apple M3 MAX with Python 3.11 it takes about 200ms to calculate _fib(32)
-            # And _fib() is called 5 times so it should take about 1 second
-            # We use 5 threads below so it should take about 5 seconds
-            _fib(32)
-            # Just make sure gevent switches threads/greenlets
-            time.sleep(0)
-
-    threads = []
-
-    with stack.StackCollector():
-        for i in range(5):
-            t = threading.Thread(target=_dofib, name="TestThread %d" % i)
-            t.start()
-            threads.append(t)
-        for t in threads:
-            t.join()
-
-    ddup.upload()
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
-    assert len(samples) > 0
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name=r"Greenlet-\d+$",
-            locations=[
-                # Since we're using recursive function _fib(), we expect to have
-                # multiple locations for _fib(n) = _fib(n-1) + _fib(n-2)
-                pprof_utils.StackLocation(
-                    filename="test_stack.py",
-                    function_name="_fib",
-                    line_no=_fib.__code__.co_firstlineno + 6,
-                ),
-                pprof_utils.StackLocation(
-                    filename="test_stack.py",
-                    function_name="_fib",
-                    line_no=_fib.__code__.co_firstlineno + 6,
-                ),
-                pprof_utils.StackLocation(
-                    filename="test_stack.py",
-                    function_name="_fib",
-                    line_no=_fib.__code__.co_firstlineno + 6,
-                ),
-            ],
-        ),
-    )
-
-
-def test_repr():
-    test_collector._test_repr(
-        stack.StackCollector,
-        "StackCollector(status=<ServiceStatus.STOPPED: 'stopped'>, nframes=64, tracer=None)",
-    )
diff --git a/tests/profiling_v2/collector/test_stack_asyncio.py b/tests/profiling_v2/collector/test_stack_asyncio.py
deleted file mode 100644
index a13ea2aaad9..00000000000
--- a/tests/profiling_v2/collector/test_stack_asyncio.py
+++ /dev/null
@@ -1,872 +0,0 @@
-import pytest
-
-
-@pytest.mark.subprocess(
-    env=dict(
-        DD_PROFILING_OUTPUT_PPROF="/tmp/test_stack_asyncio",
-    ),
-    err=None,
-)
-# For macOS: err=None ignores expected stderr from tracer failing to connect to agent (not relevant to this test)
-def test_asyncio():
-    import asyncio
-    import os
-    import time
-    import uuid
-
-    from ddtrace import ext
-    from ddtrace.internal.datadog.profiling import stack_v2
-    from ddtrace.profiling import profiler
-    from ddtrace.trace import tracer
-    from tests.profiling.collector import pprof_utils
-
-    assert stack_v2.is_available, stack_v2.failure_msg
-
-    sleep_time = 0.2
-    loop_run_time = 3
-
-    async def stuff() -> None:
-        start_time = time.time()
-        while time.time() < start_time + loop_run_time:
-            await asyncio.sleep(sleep_time)
-
-    async def hello():
-        t1 = asyncio.create_task(stuff(), name="sleep 1")
-        t2 = asyncio.create_task(stuff(), name="sleep 2")
-        await stuff()
-        return (t1, t2)
-
-    resource = str(uuid.uuid4())
-    span_type = ext.SpanTypes.WEB
-
-    p = profiler.Profiler(tracer=tracer)
-    p.start()
-    with tracer.trace("test_asyncio", resource=resource, span_type=span_type) as span:
-        span_id = span.span_id
-        local_root_span_id = span._local_root.span_id
-
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        maintask = loop.create_task(hello(), name="main")
-
-        t1, t2 = loop.run_until_complete(maintask)
-    p.stop()
-
-    t1_name = t1.get_name()
-    t2_name = t2.get_name()
-
-    assert t1_name == "sleep 1"
-    assert t2_name == "sleep 2"
-
-    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
-
-    profile = pprof_utils.parse_newest_profile(output_filename)
-
-    samples_with_span_id = pprof_utils.get_samples_with_label_key(profile, "span id")
-    assert len(samples_with_span_id) > 0
-
-    # get samples with task_name
-    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
-    # The next fails if stack_v2 is not properly configured with asyncio task
-    # tracking via ddtrace.profiling._asyncio
-    assert len(samples) > 0
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name="main",
-            span_id=span_id,
-            local_root_span_id=local_root_span_id,
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name="hello", filename="test_stack_asyncio.py", line_no=hello.__code__.co_firstlineno + 3
-                )
-            ],
-        ),
-    )
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name=t1_name,
-            span_id=span_id,
-            local_root_span_id=local_root_span_id,
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name="stuff", filename="test_stack_asyncio.py", line_no=stuff.__code__.co_firstlineno + 3
-                ),
-            ],
-        ),
-    )
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name=t2_name,
-            span_id=span_id,
-            local_root_span_id=local_root_span_id,
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name="stuff", filename="test_stack_asyncio.py", line_no=stuff.__code__.co_firstlineno + 3
-                ),
-            ],
-        ),
-    )
-
-
-@pytest.mark.subprocess(
-    env=dict(
-        DD_PROFILING_OUTPUT_PPROF="/tmp/test_asyncio_start_profiler_from_process_before_importing_asyncio",
-    ),
-    err=None,
-)
-# For macOS: err=None ignores expected stderr from tracer failing to connect to agent (not relevant to this test)
-def test_asyncio_start_profiler_from_process_before_importing_asyncio():
-    from ddtrace.internal.datadog.profiling import stack_v2
-    from ddtrace.profiling import profiler
-
-    assert stack_v2.is_available, stack_v2.failure_msg
-
-    p = profiler.Profiler()
-    p.start()
-
-    import asyncio
-    import os
-    import sys
-    import time
-
-    # Start an asyncio loop BEFORE importing profiler modules
-    # This simulates the bug scenario where a loop exists before profiling is enabled
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    async def my_function():
-        async def background_task_func() -> None:
-            """Background task that runs in the existing loop."""
-            await asyncio.sleep(1.5)
-
-        # Create and start a task in the existing loop
-        background_task = loop.create_task(background_task_func(), name="background")
-        assert background_task is not None
-
-        # Run tasks that should be tracked
-        sleep_time = 0.2
-        loop_run_time = 0.75
-
-        async def tracked_task() -> None:
-            start_time = time.time()
-            while time.time() < start_time + loop_run_time:
-                await asyncio.sleep(sleep_time)
-
-        async def main_task():
-            t1 = asyncio.create_task(tracked_task(), name="tracked 1")
-            t2 = asyncio.create_task(tracked_task(), name="tracked 2")
-            await tracked_task()
-            await asyncio.sleep(0.25)
-            return t1, t2
-
-        result = await main_task()
-
-        await background_task
-
-        return tracked_task, background_task_func, result
-
-    main_task = loop.create_task(my_function(), name="main")
-    tracked_task_def, background_task_def, (t1, t2) = loop.run_until_complete(main_task)
-
-    p.stop()
-
-    t1_name = t1.get_name()
-    t2_name = t2.get_name()
-
-    assert t1_name == "tracked 1"
-    assert t2_name == "tracked 2"
-
-    from tests.profiling.collector import pprof_utils
-
-    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
-    profile = pprof_utils.parse_newest_profile(output_filename)
-
-    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
-    assert len(samples) > 0, "No task names found - existing loop was not tracked!"
-
-    if sys.version_info >= (3, 11):
-        EXPECTED_FUNCTION_NAME_BACKGROUND = f"{my_function.__name__}.<locals>.{background_task_def.__name__}"
-    else:
-        EXPECTED_FUNCTION_NAME_BACKGROUND = background_task_def.__name__
-    EXPECTED_FILENAME_BACKGROUND = os.path.basename(background_task_def.__code__.co_filename)
-    EXPECTED_LINE_NO_BACKGROUND = background_task_def.__code__.co_firstlineno + 2
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name="background",
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name=EXPECTED_FUNCTION_NAME_BACKGROUND,
-                    filename=EXPECTED_FILENAME_BACKGROUND,
-                    line_no=EXPECTED_LINE_NO_BACKGROUND,
-                ),
-            ],
-        ),
-    )
-
-    # Verify specific tasks are in the profile
-    if sys.version_info >= (3, 11):
-        EXPECTED_FUNCTION_NAME_TRACKED = f"{my_function.__name__}.<locals>.{tracked_task_def.__name__}"
-    else:
-        EXPECTED_FUNCTION_NAME_TRACKED = tracked_task_def.__name__
-    EXPECTED_FILENAME_TRACKED = os.path.basename(tracked_task_def.__code__.co_filename)
-    EXPECTED_LINE_NO_TRACKED = tracked_task_def.__code__.co_firstlineno + 3
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name=t1_name,
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name=EXPECTED_FUNCTION_NAME_TRACKED,
-                    filename=EXPECTED_FILENAME_TRACKED,
-                    line_no=EXPECTED_LINE_NO_TRACKED,
-                )
-            ],
-        ),
-    )
-
-
-@pytest.mark.subprocess(
-    env=dict(
-        DD_PROFILING_OUTPUT_PPROF="/tmp/test_asyncio_start_profiler_from_process_before_starting_loop",
-    ),
-    err=None,
-)
-# For macOS: err=None ignores expected stderr from tracer failing to connect to agent (not relevant to this test)
-def test_asyncio_start_profiler_from_process_before_starting_loop():
-    import asyncio
-    import os
-    import sys
-    import time
-
-    from ddtrace.internal.datadog.profiling import stack_v2
-    from ddtrace.profiling import profiler
-
-    assert stack_v2.is_available, stack_v2.failure_msg
-
-    p = profiler.Profiler()
-    p.start()
-
-    # Start an asyncio loop BEFORE importing profiler modules
-    # This simulates the bug scenario where a loop exists before profiling is enabled
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    async def my_function():
-        async def background_task_func() -> None:
-            """Background task that runs in the existing loop."""
-            await asyncio.sleep(1.5)
-
-        # Create and start a task in the existing loop
-        background_task = loop.create_task(background_task_func(), name="background")
-        assert background_task is not None
-
-        # Run tasks that should be tracked
-        sleep_time = 0.2
-        loop_run_time = 0.75
-
-        async def tracked_task() -> None:
-            start_time = time.time()
-            while time.time() < start_time + loop_run_time:
-                await asyncio.sleep(sleep_time)
-
-        async def main_task():
-            t1 = asyncio.create_task(tracked_task(), name="tracked 1")
-            t2 = asyncio.create_task(tracked_task(), name="tracked 2")
-            await tracked_task()
-            await asyncio.sleep(0.25)
-            return t1, t2
-
-        result = await main_task()
-
-        await background_task
-
-        return tracked_task, background_task_func, result
-
-    main_task = loop.create_task(my_function(), name="main")
-    tracked_task_def, background_task_def, (t1, t2) = loop.run_until_complete(main_task)
-
-    p.stop()
-
-    t1_name = t1.get_name()
-    t2_name = t2.get_name()
-
-    assert t1_name == "tracked 1"
-    assert t2_name == "tracked 2"
-
-    from tests.profiling.collector import pprof_utils
-
-    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
-    profile = pprof_utils.parse_newest_profile(output_filename)
-
-    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
-    assert len(samples) > 0, "No task names found - existing loop was not tracked!"
-
-    if sys.version_info >= (3, 11):
-        EXPECTED_FUNCTION_NAME_BACKGROUND = f"{my_function.__name__}.<locals>.{background_task_def.__name__}"
-    else:
-        EXPECTED_FUNCTION_NAME_BACKGROUND = background_task_def.__name__
-    EXPECTED_FILENAME_BACKGROUND = os.path.basename(background_task_def.__code__.co_filename)
-    EXPECTED_LINE_NO_BACKGROUND = background_task_def.__code__.co_firstlineno + 2
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name="background",
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name=EXPECTED_FUNCTION_NAME_BACKGROUND,
-                    filename=EXPECTED_FILENAME_BACKGROUND,
-                    line_no=EXPECTED_LINE_NO_BACKGROUND,
-                ),
-            ],
-        ),
-    )
-
-    # Verify specific tasks are in the profile
-    if sys.version_info >= (3, 11):
-        EXPECTED_FUNCTION_NAME_TRACKED = f"{my_function.__name__}.<locals>.{tracked_task_def.__name__}"
-    else:
-        EXPECTED_FUNCTION_NAME_TRACKED = tracked_task_def.__name__
-    EXPECTED_FILENAME_TRACKED = os.path.basename(tracked_task_def.__code__.co_filename)
-    EXPECTED_LINE_NO_TRACKED = tracked_task_def.__code__.co_firstlineno + 3
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name=t1_name,
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name=EXPECTED_FUNCTION_NAME_TRACKED,
-                    filename=EXPECTED_FILENAME_TRACKED,
-                    line_no=EXPECTED_LINE_NO_TRACKED,
-                )
-            ],
-        ),
-    )
-
-
-@pytest.mark.xfail(reason="This test fails because there's no way to get the current loop if it's not already running.")
-@pytest.mark.subprocess(
-    env=dict(
-        DD_PROFILING_OUTPUT_PPROF="/tmp/test_asyncio_start_profiler_from_process_after_creating_loop",
-    ),
-    err=None,
-)
-# For macOS: err=None ignores expected stderr from tracer failing to connect to agent (not relevant to this test)
-def test_asyncio_start_profiler_from_process_after_creating_loop():
-    import asyncio
-    import os
-    import sys
-    import time
-
-    from ddtrace.internal.datadog.profiling import stack_v2
-    from ddtrace.profiling import profiler
-
-    # Start an asyncio loop BEFORE importing profiler modules
-    # This simulates the bug scenario where a loop exists before profiling is enabled
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    assert stack_v2.is_available, stack_v2.failure_msg
-
-    p = profiler.Profiler()
-    p.start()
-
-    async def my_function():
-        async def background_task_func() -> None:
-            """Background task that runs in the existing loop."""
-            await asyncio.sleep(1.5)
-
-        # Create and start a task in the existing loop
-        background_task = loop.create_task(background_task_func(), name="background")
-        assert background_task is not None
-
-        # Run tasks that should be tracked
-        sleep_time = 0.2
-        loop_run_time = 0.75
-
-        async def tracked_task() -> None:
-            start_time = time.time()
-            while time.time() < start_time + loop_run_time:
-                await asyncio.sleep(sleep_time)
-
-        async def main_task():
-            t1 = asyncio.create_task(tracked_task(), name="tracked 1")
-            t2 = asyncio.create_task(tracked_task(), name="tracked 2")
-            await tracked_task()
-            await asyncio.sleep(0.25)
-            return t1, t2
-
-        result = await main_task()
-
-        await background_task
-
-        return tracked_task, background_task_func, result
-
-    main_task = loop.create_task(my_function(), name="main")
-    tracked_task_def, background_task_def, (t1, t2) = loop.run_until_complete(main_task)
-
-    p.stop()
-
-    t1_name = t1.get_name()
-    t2_name = t2.get_name()
-
-    assert t1_name == "tracked 1"
-    assert t2_name == "tracked 2"
-
-    from tests.profiling.collector import pprof_utils
-
-    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
-    profile = pprof_utils.parse_newest_profile(output_filename)
-
-    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
-    assert len(samples) > 0, "No task names found - existing loop was not tracked!"
-
-    EXPECTED_FILENAME_BACKGROUND = os.path.basename(background_task_def.__code__.co_filename)
-    EXPECTED_LINE_NO_BACKGROUND = background_task_def.__code__.co_firstlineno + 2
-    if sys.version_info >= (3, 11):
-        EXPECTED_FUNCTION_NAME_BACKGROUND = f"{my_function.__name__}.<locals>.{background_task_def.__name__}"
-    else:
-        EXPECTED_FUNCTION_NAME_BACKGROUND = background_task_def.__name__
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name="background",
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name=EXPECTED_FUNCTION_NAME_BACKGROUND,
-                    filename=EXPECTED_FILENAME_BACKGROUND,
-                    line_no=EXPECTED_LINE_NO_BACKGROUND,
-                ),
-            ],
-        ),
-    )
-
-    # Verify specific tasks are in the profile
-    EXPECTED_FILENAME_TRACKED = os.path.basename(tracked_task_def.__code__.co_filename)
-    EXPECTED_LINE_NO_TRACKED = tracked_task_def.__code__.co_firstlineno + 3
-    if sys.version_info >= (3, 11):
-        EXPECTED_FUNCTION_NAME_TRACKED = f"{my_function.__name__}.<locals>.{tracked_task_def.__name__}"
-    else:
-        EXPECTED_FUNCTION_NAME_TRACKED = tracked_task_def.__name__
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name=t1_name,
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name=EXPECTED_FUNCTION_NAME_TRACKED,
-                    filename=EXPECTED_FILENAME_TRACKED,
-                    line_no=EXPECTED_LINE_NO_TRACKED,
-                )
-            ],
-        ),
-    )
-
-
-@pytest.mark.xfail(reason="This test fails because there's no way to get the current loop if it's not already running.")
-@pytest.mark.subprocess(
-    env=dict(
-        DD_PROFILING_OUTPUT_PPROF="/tmp/test_asyncio_import_profiler_from_process_after_starting_loop",
-    ),
-    err=None,
-)
-# For macOS: err=None ignores expected stderr from tracer failing to connect to agent (not relevant to this test)
-def test_asyncio_import_profiler_from_process_after_starting_loop():
-    import asyncio
-    import os
-    import sys
-    import time
-
-    # Start an asyncio loop BEFORE importing profiler modules
-    # This simulates the bug scenario where a loop exists before profiling is enabled
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    from ddtrace.internal.datadog.profiling import stack_v2
-    from ddtrace.profiling import profiler
-
-    assert stack_v2.is_available, stack_v2.failure_msg
-
-    p = profiler.Profiler()
-    p.start()
-
-    async def my_function():
-        async def background_task_func() -> None:
-            """Background task that runs in the existing loop."""
-            await asyncio.sleep(1.5)
-
-        # Create and start a task in the existing loop
-        background_task = loop.create_task(background_task_func(), name="background")
-        assert background_task is not None
-
-        # Run tasks that should be tracked
-        sleep_time = 0.2
-        loop_run_time = 0.75
-
-        async def tracked_task() -> None:
-            start_time = time.time()
-            while time.time() < start_time + loop_run_time:
-                await asyncio.sleep(sleep_time)
-
-        async def main_task():
-            t1 = asyncio.create_task(tracked_task(), name="tracked 1")
-            t2 = asyncio.create_task(tracked_task(), name="tracked 2")
-            await tracked_task()
-            await asyncio.sleep(0.25)
-            return t1, t2
-
-        result = await main_task()
-
-        await background_task
-
-        return tracked_task, background_task_func, result
-
-    main_task = loop.create_task(my_function(), name="main")
-    tracked_task_def, background_task_def, (t1, t2) = loop.run_until_complete(main_task)
-
-    p.stop()
-
-    t1_name = t1.get_name()
-    t2_name = t2.get_name()
-
-    assert t1_name == "tracked 1"
-    assert t2_name == "tracked 2"
-
-    from tests.profiling.collector import pprof_utils
-
-    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
-    profile = pprof_utils.parse_newest_profile(output_filename)
-
-    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
-    assert len(samples) > 0, "No task names found - existing loop was not tracked!"
-
-    EXPECTED_FILENAME_BACKGROUND = os.path.basename(background_task_def.__code__.co_filename)
-    EXPECTED_LINE_NO_BACKGROUND = background_task_def.__code__.co_firstlineno
-    if sys.version_info >= (3, 11):
-        EXPECTED_FUNCTION_NAME_BACKGROUND = f"{my_function.__name__}.<locals>.{background_task_def.__name__}"
-    else:
-        EXPECTED_FUNCTION_NAME_BACKGROUND = background_task_def.__name__
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name="background",
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name=EXPECTED_FUNCTION_NAME_BACKGROUND,
-                    filename=EXPECTED_FILENAME_BACKGROUND,
-                    line_no=EXPECTED_LINE_NO_BACKGROUND,
-                ),
-            ],
-        ),
-    )
-
-    # Verify specific tasks are in the profile
-    EXPECTED_FILENAME_TRACKED = os.path.basename(tracked_task_def.__code__.co_filename)
-    EXPECTED_LINE_NO_TRACKED = tracked_task_def.__code__.co_firstlineno + 3
-    if sys.version_info >= (3, 11):
-        EXPECTED_FUNCTION_NAME_TRACKED = f"{my_function.__name__}.<locals>.{tracked_task_def.__name__}"
-    else:
-        EXPECTED_FUNCTION_NAME_TRACKED = tracked_task_def.__name__
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name=t1_name,
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name=EXPECTED_FUNCTION_NAME_TRACKED,
-                    filename=EXPECTED_FILENAME_TRACKED,
-                    line_no=EXPECTED_LINE_NO_TRACKED,
-                )
-            ],
-        ),
-    )
-
-
-@pytest.mark.subprocess(
-    env=dict(
-        DD_PROFILING_OUTPUT_PPROF="/tmp/test_asyncio_start_profiler_from_process_after_creating_loop_and_task",
-    ),
-    err=None,
-)
-def test_asyncio_start_profiler_from_process_after_task_start():
-    # NOW import profiling modules - this should track the existing loop
-    import asyncio
-    import os
-    import sys
-    import time
-
-    from ddtrace.internal.datadog.profiling import stack_v2
-    from ddtrace.profiling import profiler
-
-    # Start an asyncio loop BEFORE importing profiler modules
-    # This simulates the bug scenario where a loop exists before profiling is enabled
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    async def my_function():
-        async def background_task_func() -> None:
-            """Background task that runs in the existing loop."""
-            await asyncio.sleep(1.5)
-
-        # Create and start a task in the existing loop
-        background_task = loop.create_task(background_task_func(), name="background")
-        assert background_task is not None
-
-        # Start profiler after loop is already running
-        assert asyncio.get_running_loop() is loop
-
-        assert stack_v2.is_available, stack_v2.failure_msg
-
-        p = profiler.Profiler()
-        p.start()
-
-        # Run tasks that should be tracked
-        sleep_time = 0.2
-        loop_run_time = 0.75
-
-        async def tracked_task() -> None:
-            start_time = time.time()
-            while time.time() < start_time + loop_run_time:
-                await asyncio.sleep(sleep_time)
-
-        async def main_task():
-            t1 = asyncio.create_task(tracked_task(), name="tracked 1")
-            t2 = asyncio.create_task(tracked_task(), name="tracked 2")
-            await tracked_task()
-            await asyncio.sleep(0.25)
-            return t1, t2
-
-        result = await main_task()
-
-        await background_task
-
-        return tracked_task, background_task_func, p, result
-
-    main_task = loop.create_task(my_function(), name="main")
-    tracked_task_def, background_task_def, p, (t1, t2) = loop.run_until_complete(main_task)
-
-    p.stop()
-
-    t1_name = t1.get_name()
-    t2_name = t2.get_name()
-
-    assert t1_name == "tracked 1"
-    assert t2_name == "tracked 2"
-
-    from tests.profiling.collector import pprof_utils
-
-    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
-    profile = pprof_utils.parse_newest_profile(output_filename)
-
-    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
-    assert len(samples) > 0, "No task names found - existing loop was not tracked!"
-
-    EXPECTED_FILENAME_BACKGROUND = os.path.basename(background_task_def.__code__.co_filename)
-    EXPECTED_LINE_NO_BACKGROUND = background_task_def.__code__.co_firstlineno
-    if sys.version_info >= (3, 11):
-        EXPECTED_FUNCTION_NAME_BACKGROUND = f"{my_function.__name__}.<locals>.{background_task_def.__name__}"
-    else:
-        EXPECTED_FUNCTION_NAME_BACKGROUND = background_task_def.__name__
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name="background",
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name=EXPECTED_FUNCTION_NAME_BACKGROUND,
-                    filename=EXPECTED_FILENAME_BACKGROUND,
-                    line_no=EXPECTED_LINE_NO_BACKGROUND,
-                ),
-            ],
-        ),
-    )
-
-    # Verify specific tasks are in the profile
-    EXPECTED_FILENAME_TRACKED = os.path.basename(tracked_task_def.__code__.co_filename)
-    EXPECTED_LINE_NO_TRACKED = tracked_task_def.__code__.co_firstlineno + 3
-    if sys.version_info >= (3, 11):
-        EXPECTED_FUNCTION_NAME_TRACKED = f"{my_function.__name__}.<locals>.{tracked_task_def.__name__}"
-    else:
-        EXPECTED_FUNCTION_NAME_TRACKED = tracked_task_def.__name__
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name=t1_name,
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name=EXPECTED_FUNCTION_NAME_TRACKED,
-                    filename=EXPECTED_FILENAME_TRACKED,
-                    line_no=EXPECTED_LINE_NO_TRACKED,
-                )
-            ],
-        ),
-    )
-
-
-@pytest.mark.subprocess(
-    env=dict(
-        DD_PROFILING_OUTPUT_PPROF="/tmp/test_asyncio_start_profiler_from_process_after_task_start",
-    ),
-    err=None,
-)
-def test_asyncio_import_and_start_profiler_from_process_after_task_start():
-    import asyncio
-    import os
-    import sys
-    import time
-
-    # Start an asyncio loop BEFORE importing profiler modules
-    # This simulates the bug scenario where a loop exists before profiling is enabled
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    async def my_function():
-        async def background_task_func() -> None:
-            """Background task that runs in the existing loop."""
-            await asyncio.sleep(1.5)
-
-        # Create and start a task in the existing loop
-        background_task = loop.create_task(background_task_func(), name="background")
-        assert background_task is not None
-
-        # Start profiler after loop is already running
-        assert asyncio.get_running_loop() is loop
-
-        # NOW import profiling modules - this should track the existing loop
-        from ddtrace.internal.datadog.profiling import stack_v2
-        from ddtrace.profiling import profiler
-
-        assert stack_v2.is_available, stack_v2.failure_msg
-
-        p = profiler.Profiler()
-        p.start()
-
-        # Run tasks that should be tracked
-        sleep_time = 0.2
-        loop_run_time = 0.75
-
-        async def tracked_task() -> None:
-            start_time = time.time()
-            while time.time() < start_time + loop_run_time:
-                await asyncio.sleep(sleep_time)
-
-        async def main_task():
-            t1 = asyncio.create_task(tracked_task(), name="tracked 1")
-            t2 = asyncio.create_task(tracked_task(), name="tracked 2")
-            await tracked_task()
-            await asyncio.sleep(0.25)
-            return t1, t2
-
-        result = await main_task()
-
-        await background_task
-
-        return tracked_task, background_task_func, p, result
-
-    main_task = loop.create_task(my_function(), name="main")
-    tracked_task_def, background_task_def, p, (t1, t2) = loop.run_until_complete(main_task)
-
-    p.stop()
-
-    t1_name = t1.get_name()
-    t2_name = t2.get_name()
-
-    assert t1_name == "tracked 1"
-    assert t2_name == "tracked 2"
-
-    from tests.profiling.collector import pprof_utils
-
-    output_filename = os.environ["DD_PROFILING_OUTPUT_PPROF"] + "." + str(os.getpid())
-    profile = pprof_utils.parse_newest_profile(output_filename)
-
-    samples = pprof_utils.get_samples_with_label_key(profile, "task name")
-    assert len(samples) > 0, "No task names found - existing loop was not tracked!"
-
-    if sys.version_info >= (3, 11):
-        EXPECTED_FUNCTION_NAME_BACKGROUND = f"{my_function.__name__}.<locals>.{background_task_def.__name__}"
-    else:
-        EXPECTED_FUNCTION_NAME_BACKGROUND = background_task_def.__name__
-    EXPECTED_FILENAME_BACKGROUND = os.path.basename(background_task_def.__code__.co_filename)
-    EXPECTED_LINE_NO_BACKGROUND = background_task_def.__code__.co_firstlineno
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name="background",
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name=EXPECTED_FUNCTION_NAME_BACKGROUND,
-                    filename=EXPECTED_FILENAME_BACKGROUND,
-                    line_no=EXPECTED_LINE_NO_BACKGROUND,
-                ),
-            ],
-        ),
-    )
-
-    # Verify specific tasks are in the profile
-    if sys.version_info >= (3, 11):
-        EXPECTED_FUNCTION_NAME_TRACKED = f"{my_function.__name__}.<locals>.{tracked_task_def.__name__}"
-    else:
-        EXPECTED_FUNCTION_NAME_TRACKED = tracked_task_def.__name__
-    EXPECTED_FILENAME_TRACKED = os.path.basename(tracked_task_def.__code__.co_filename)
-    EXPECTED_LINE_NO_TRACKED = tracked_task_def.__code__.co_firstlineno + 3
-
-    pprof_utils.assert_profile_has_sample(
-        profile,
-        samples,
-        expected_sample=pprof_utils.StackEvent(
-            thread_name="MainThread",
-            task_name=t1_name,
-            locations=[
-                pprof_utils.StackLocation(
-                    function_name=EXPECTED_FUNCTION_NAME_TRACKED,
-                    filename=EXPECTED_FILENAME_TRACKED,
-                    line_no=EXPECTED_LINE_NO_TRACKED,
-                )
-            ],
-        ),
-    )
diff --git a/tests/profiling_v2/exporter/__init__.py b/tests/profiling_v2/exporter/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/profiling_v2/simple_program.py b/tests/profiling_v2/simple_program.py
deleted file mode 100755
index 1ff264c7408..00000000000
--- a/tests/profiling_v2/simple_program.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python
-import os
-import sys
-
-from ddtrace.internal import service
-from ddtrace.profiling import bootstrap
-from ddtrace.profiling.collector import stack
-
-
-for running_collector in bootstrap.profiler._profiler._collectors:
-    if isinstance(running_collector, stack.StackCollector):
-        break
-else:
-    raise AssertionError("Unable to find stack collector")
-
-
-print("hello world")
-assert running_collector.status == service.ServiceStatus.RUNNING
-
-# Do some serious memory allocations!
-for _ in range(5000000):
-    object()
-
-print(os.getpid())
-sys.exit(42)
diff --git a/tests/profiling_v2/simple_program_fork.py b/tests/profiling_v2/simple_program_fork.py
deleted file mode 100644
index 57f3bf81f64..00000000000
--- a/tests/profiling_v2/simple_program_fork.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import os
-import sys
-import threading
-
-from ddtrace.internal import service
-import ddtrace.profiling.auto  # noqa: F401
-import ddtrace.profiling.bootstrap
-import ddtrace.profiling.profiler  # noqa: F401
-
-
-lock = threading.Lock()
-lock.acquire()
-
-
-assert ddtrace.profiling.bootstrap.profiler.status == service.ServiceStatus.RUNNING
-
-
-child_pid = os.fork()
-if child_pid == 0:
-    # Release it
-    lock.release()
-
-    # We track this one though
-    lock = threading.Lock()
-    lock.acquire()
-    lock.release()
-else:
-    lock.release()
-    assert ddtrace.profiling.bootstrap.profiler.status == service.ServiceStatus.RUNNING
-    print(child_pid)
-    pid, status = os.waitpid(child_pid, 0)
-    sys.exit(os.WEXITSTATUS(status))
diff --git a/tests/profiling_v2/simple_program_gevent.py b/tests/profiling_v2/simple_program_gevent.py
deleted file mode 100644
index 1cc85d2c921..00000000000
--- a/tests/profiling_v2/simple_program_gevent.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Import from ddtrace before monkey patching to ensure that we grab all the
-# necessary references to the unpatched modules.
-import ddtrace.auto  # noqa: F401, I001
-import ddtrace.profiling.auto  # noqa:F401
-
-
-import gevent.monkey  # noqa:F402
-
-gevent.monkey.patch_all()
-
-import threading  # noqa: E402, F402, I001
-import time  # noqa: E402, F402
-
-
-def fibonacci(n):
-    if n == 0:
-        return 0
-    elif n == 1:
-        return 1
-    else:
-        return fibonacci(n - 1) + fibonacci(n - 2)
-
-
-i = 1
-for _ in range(20):
-    threads = []
-    for _ in range(10):
-        t = threading.Thread(target=fibonacci, args=(i,))
-        t.start()
-        threads.append(t)
-    i += 1
-    for t in threads:
-        t.join()
-    time.sleep(0.1)
diff --git a/tests/profiling_v2/test_gunicorn.py b/tests/profiling_v2/test_gunicorn.py
deleted file mode 100644
index 78297c85e55..00000000000
--- a/tests/profiling_v2/test_gunicorn.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# -*- encoding: utf-8 -*-
-import os
-import re
-import subprocess
-import sys
-import time
-import urllib.request
-
-import pytest
-
-from tests.profiling.collector import pprof_utils
-
-
-# DEV: gunicorn tests are hard to debug, so keeping these print statements for
-# future debugging
-DEBUG_PRINT = True
-
-
-def debug_print(*args):
-    if DEBUG_PRINT:
-        print(*args)
-
-
-# gunicorn is not available on Windows
-if sys.platform == "win32":
-    pytestmark = pytest.mark.skip
-
-TESTING_GEVENT = os.getenv("DD_PROFILE_TEST_GEVENT", False)
-
-
-def _run_gunicorn(*args):
-    cmd = (
-        [
-            "ddtrace-run",
-            "gunicorn",
-            "--bind",
-            "127.0.0.1:7644",
-            "--worker-tmp-dir",
-            "/dev/shm",
-            "-c",
-            os.path.dirname(__file__) + "/gunicorn.conf.py",
-            "--chdir",
-            os.path.dirname(__file__),
-        ]
-        + list(args)
-        + ["tests.profiling.gunicorn-app:app"]
-    )
-    return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-
-
-@pytest.fixture
-def gunicorn(monkeypatch):
-    monkeypatch.setenv("DD_PROFILING_IGNORE_PROFILER", "1")
-    monkeypatch.setenv("DD_PROFILING_ENABLED", "1")
-
-    yield _run_gunicorn
-
-
-def _get_worker_pids(stdout):
-    # type: (str) -> list[int]
-    return [int(_) for _ in re.findall(r"Booting worker with pid: (\d+)", stdout)]
-
-
-def _test_gunicorn(gunicorn, tmp_path, monkeypatch, *args):
-    # type: (...) -> None
-    filename = str(tmp_path / "gunicorn.pprof")
-    monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", filename)
-    monkeypatch.setenv("_DD_PROFILING_STACK_V2_ADAPTIVE_SAMPLING_ENABLED", "0")
-
-    debug_print("Creating gunicorn workers")
-    # DEV: We only start 1 worker to simplify the test
-    proc = gunicorn("-w", "1", *args)
-    # Wait for the workers to start
-    time.sleep(5)
-
-    if proc.poll() is not None:
-        pytest.fail("Gunicorn failed to start")
-
-    debug_print("Making request to gunicorn server")
-    try:
-        with urllib.request.urlopen("http://127.0.0.1:7644", timeout=5) as f:
-            status_code = f.getcode()
-            assert status_code == 200, status_code
-            response = f.read().decode()
-            debug_print(response)
-    except Exception as e:
-        proc.terminate()
-        output = proc.stdout.read().decode()
-        print(output)
-        pytest.fail("Failed to make request to gunicorn server %s" % e)
-    finally:
-        # Need to terminate the process to get the output and release the port
-        proc.terminate()
-
-    debug_print("Reading gunicorn worker output to get PIDs")
-    output = proc.stdout.read().decode()
-    worker_pids = _get_worker_pids(output)
-    debug_print("Gunicorn worker PIDs: %s" % worker_pids)
-
-    for line in output.splitlines():
-        debug_print(line)
-
-    assert len(worker_pids) == 1, output
-
-    debug_print("Waiting for gunicorn process to terminate")
-    try:
-        assert proc.wait(timeout=5) == 0, output
-    except subprocess.TimeoutExpired:
-        pytest.fail("Failed to terminate gunicorn process ", output)
-    assert "module 'threading' has no attribute '_active'" not in output, output
-
-    for pid in worker_pids:
-        debug_print("Reading pprof file with prefix %s.%d" % (filename, pid))
-        profile = pprof_utils.parse_newest_profile("%s.%d" % (filename, pid))
-        # This returns a list of samples that have non-zero cpu-time
-        samples = pprof_utils.get_samples_with_value_type(profile, "cpu-time")
-        assert len(samples) > 0
-
-        # DEV: somehow the filename is reported as either __init__.py or gunicorn-app.py
-        # when run on GitLab CI. We need to match either of these two.
-        filename_regex = r"^(?:__init__\.py|gunicorn-app\.py)$"
-
-        expected_location = pprof_utils.StackLocation(function_name="fib", filename=filename_regex, line_no=8)
-
-        pprof_utils.assert_profile_has_sample(
-            profile,
-            samples=samples,
-            # DEV: we expect multiple locations as fibonacci is recursive
-            expected_sample=pprof_utils.StackEvent(locations=[expected_location, expected_location]),
-        )
-
-
-@pytest.mark.skipif(
-    sys.version_info[:2] == (3, 8) and os.environ.get("DD_PROFILE_TEST_GEVENT") == "1",
-    reason="Flaky and fails often on Python 3.8 with DD_PROFILE_TEST_GEVENT=1",
-)
-def test_gunicorn(gunicorn, tmp_path, monkeypatch):
-    # type: (...) -> None
-    args = ("-k", "gevent") if TESTING_GEVENT else tuple()
-    _test_gunicorn(gunicorn, tmp_path, monkeypatch, *args)
diff --git a/tests/profiling_v2/test_main.py b/tests/profiling_v2/test_main.py
deleted file mode 100644
index 6920cb1d3b6..00000000000
--- a/tests/profiling_v2/test_main.py
+++ /dev/null
@@ -1,205 +0,0 @@
-# -*- encoding: utf-8 -*-
-import multiprocessing
-import os
-import sys
-
-import pytest
-
-from tests.profiling.collector import lock_utils
-from tests.profiling.collector import pprof_utils
-from tests.utils import call_program
-
-
-def test_call_script():
-    env = os.environ.copy()
-    env["DD_PROFILING_ENABLED"] = "1"
-    stdout, stderr, exitcode, _ = call_program(
-        "ddtrace-run", sys.executable, os.path.join(os.path.dirname(__file__), "simple_program.py"), env=env
-    )
-    if sys.platform == "win32":
-        assert exitcode == 0, (stdout, stderr)
-    else:
-        assert exitcode == 42, (stdout, stderr)
-    hello, pid = list(s.strip() for s in stdout.decode().strip().split("\n"))
-    assert hello == "hello world", stdout.decode().strip()
-
-
-@pytest.mark.skipif(not os.getenv("DD_PROFILE_TEST_GEVENT", False), reason="Not testing gevent")
-def test_call_script_gevent():
-    if sys.version_info[:2] == (3, 8):
-        pytest.skip("this test is flaky on 3.8 with stack v2")
-    env = os.environ.copy()
-    env["DD_PROFILING_ENABLED"] = "1"
-    stdout, stderr, exitcode, pid = call_program(
-        sys.executable, os.path.join(os.path.dirname(__file__), "simple_program_gevent.py"), env=env
-    )
-    assert exitcode == 0, (stdout, stderr)
-
-
-def test_call_script_pprof_output(tmp_path):
-    """This checks if the pprof output and atexit register work correctly.
-
-    The script does not run for one minute, so if the `stop_on_exit` flag is broken, this test will fail.
-    """
-    filename = str(tmp_path / "pprof")
-    env = os.environ.copy()
-    env["DD_PROFILING_OUTPUT_PPROF"] = filename
-    env["DD_PROFILING_CAPTURE_PCT"] = "1"
-    env["DD_PROFILING_ENABLED"] = "1"
-    stdout, stderr, exitcode, _ = call_program(
-        "ddtrace-run",
-        sys.executable,
-        os.path.join(os.path.dirname(__file__), "../profiling", "simple_program.py"),
-        env=env,
-    )
-    if sys.platform == "win32":
-        assert exitcode == 0, (stdout, stderr)
-    else:
-        assert exitcode == 42, (stdout, stderr)
-    _, pid = list(s.strip() for s in stdout.decode().strip().split("\n"))
-    profile = pprof_utils.parse_newest_profile(filename + "." + str(pid))
-    samples = pprof_utils.get_samples_with_value_type(profile, "cpu-time")
-    assert len(samples) > 0
-
-
-@pytest.mark.skipif(sys.platform == "win32", reason="fork only available on Unix")
-def test_fork(tmp_path):
-    filename = str(tmp_path / "pprof")
-    env = os.environ.copy()
-    env["DD_PROFILING_OUTPUT_PPROF"] = filename
-    env["DD_PROFILING_CAPTURE_PCT"] = "100"
-    stdout, stderr, exitcode, pid = call_program(
-        "python", os.path.join(os.path.dirname(__file__), "simple_program_fork.py"), env=env
-    )
-    assert exitcode == 0
-    child_pid = stdout.decode().strip()
-    profile = pprof_utils.parse_newest_profile(filename + "." + str(pid))
-    parent_expected_acquire_events = [
-        pprof_utils.LockAcquireEvent(
-            caller_name="<module>",
-            filename="simple_program_fork.py",
-            linenos=lock_utils.LineNo(create=11, acquire=12, release=28),
-            lock_name="lock",
-        ),
-    ]
-    parent_expected_release_events = [
-        pprof_utils.LockReleaseEvent(
-            caller_name="<module>",
-            filename="simple_program_fork.py",
-            linenos=lock_utils.LineNo(create=11, acquire=12, release=28),
-            lock_name="lock",
-        ),
-    ]
-    pprof_utils.assert_lock_events(
-        profile,
-        expected_acquire_events=parent_expected_acquire_events,
-        expected_release_events=parent_expected_release_events,
-    )
-    child_profile = pprof_utils.parse_newest_profile(filename + "." + str(child_pid))
-    # We expect the child profile to not have lock events from the parent process
-    # Note that assert_lock_events function only checks that the given events
-    # exists, and doesn't assert that other events don't exist.
-    with pytest.raises(AssertionError):
-        pprof_utils.assert_lock_events(
-            child_profile,
-            expected_acquire_events=parent_expected_acquire_events,
-            expected_release_events=parent_expected_release_events,
-        )
-    pprof_utils.assert_lock_events(
-        child_profile,
-        expected_acquire_events=[
-            # After fork(), we clear the samples in child, so we only have one
-            # lock acquire event
-            pprof_utils.LockAcquireEvent(
-                caller_name="<module>",
-                filename="simple_program_fork.py",
-                linenos=lock_utils.LineNo(create=24, acquire=25, release=26),
-                lock_name="lock",
-            ),
-        ],
-        expected_release_events=[
-            pprof_utils.LockReleaseEvent(
-                caller_name="<module>",
-                filename="simple_program_fork.py",
-                linenos=lock_utils.LineNo(create=11, acquire=12, release=21),
-                lock_name="lock",
-            ),
-            pprof_utils.LockReleaseEvent(
-                caller_name="<module>",
-                filename="simple_program_fork.py",
-                linenos=lock_utils.LineNo(create=24, acquire=25, release=26),
-                lock_name="lock",
-            ),
-        ],
-    )
-
-
-@pytest.mark.skipif(sys.platform == "win32", reason="fork only available on Unix")
-@pytest.mark.skipif(not os.getenv("DD_PROFILE_TEST_GEVENT", False), reason="Not testing gevent")
-def test_fork_gevent():
-    env = os.environ.copy()
-    stdout, stderr, exitcode, pid = call_program(
-        "python", os.path.join(os.path.dirname(__file__), "../profiling", "gevent_fork.py"), env=env
-    )
-    assert exitcode == 0
-
-
-methods = multiprocessing.get_all_start_methods()
-
-
-@pytest.mark.parametrize(
-    "method",
-    set(methods) - {"forkserver", "fork"},
-)
-def test_multiprocessing(method, tmp_path):
-    filename = str(tmp_path / "pprof")
-    env = os.environ.copy()
-    env["DD_PROFILING_OUTPUT_PPROF"] = filename
-    env["DD_PROFILING_ENABLED"] = "1"
-    env["DD_PROFILING_CAPTURE_PCT"] = "1"
-    stdout, stderr, exitcode, _ = call_program(
-        "ddtrace-run",
-        sys.executable,
-        os.path.join(os.path.dirname(__file__), "../profiling", "_test_multiprocessing.py"),
-        method,
-        env=env,
-    )
-    assert exitcode == 0, (stdout, stderr)
-    pid, child_pid = list(s.strip() for s in stdout.decode().strip().split("\n"))
-    profile = pprof_utils.parse_newest_profile(filename + "." + str(pid))
-    samples = pprof_utils.get_samples_with_value_type(profile, "cpu-time")
-    assert len(samples) > 0
-    child_profile = pprof_utils.parse_newest_profile(filename + "." + str(child_pid))
-    child_samples = pprof_utils.get_samples_with_value_type(child_profile, "cpu-time")
-    assert len(child_samples) > 0
-
-
-@pytest.mark.subprocess(
-    ddtrace_run=True,
-    env=dict(DD_PROFILING_ENABLED="1"),
-    err=lambda _: "RuntimeError: the memalloc module is already started" not in _,
-)
-def test_memalloc_no_init_error_on_fork():
-    import os
-
-    pid = os.fork()
-    if not pid:
-        exit(0)
-    os.waitpid(pid, 0)
-
-
-@pytest.mark.subprocess(
-    ddtrace_run=True,
-    env=dict(
-        DD_PROFILING_ENABLED="1",
-        DD_UNLOAD_MODULES_FROM_SITECUSTOMIZE="1",
-    ),
-    out="OK\n",
-    err=None,
-)
-def test_profiler_start_up_with_module_clean_up_in_protobuf_app():
-    # This can cause segfaults if we do module clean up with later versions of
-    # protobuf. This is a regression test.
-    from google.protobuf import empty_pb2  # noqa:F401
-
-    print("OK")
diff --git a/tests/profiling_v2/test_profiler.py b/tests/profiling_v2/test_profiler.py
deleted file mode 100644
index 6ddf6c39ca6..00000000000
--- a/tests/profiling_v2/test_profiler.py
+++ /dev/null
@@ -1,332 +0,0 @@
-import logging
-import sys
-import time
-from unittest import mock
-
-import pytest
-
-import ddtrace
-from ddtrace.internal.compat import PYTHON_VERSION_INFO
-from ddtrace.profiling import collector
-from ddtrace.profiling import profiler
-from ddtrace.profiling import scheduler
-from ddtrace.profiling.collector import asyncio
-from ddtrace.profiling.collector import stack
-from ddtrace.profiling.collector import threading
-
-
-def test_status():
-    p = profiler.Profiler()
-    assert repr(p.status) == "<ServiceStatus.STOPPED: 'stopped'>"
-    p.start()
-    assert repr(p.status) == "<ServiceStatus.RUNNING: 'running'>"
-    p.stop(flush=False)
-    assert repr(p.status) == "<ServiceStatus.STOPPED: 'stopped'>"
-
-
-def test_restart():
-    p = profiler.Profiler()
-    p.start()
-    p.stop(flush=False)
-    p.start()
-    p.stop(flush=False)
-
-
-def test_multiple_stop():
-    """Check that the profiler can be stopped twice."""
-    p = profiler.Profiler()
-    p.start()
-    p.stop(flush=False)
-    p.stop(flush=False)
-
-
-def test_tracer_api(monkeypatch):
-    monkeypatch.setenv("DD_API_KEY", "foobar")
-    prof = profiler.Profiler(tracer=ddtrace.tracer)
-    assert prof.tracer == ddtrace.tracer
-    for col in prof._profiler._collectors:
-        if isinstance(col, stack.StackCollector):
-            assert col.tracer == ddtrace.tracer
-            break
-    else:
-        pytest.fail("Unable to find stack collector")
-
-
-@pytest.mark.subprocess()
-def test_default_memory():
-    from ddtrace.profiling import profiler
-    from ddtrace.profiling.collector import memalloc
-
-    assert any(isinstance(col, memalloc.MemoryCollector) for col in profiler.Profiler()._profiler._collectors)
-
-
-@pytest.mark.subprocess(env=dict(DD_PROFILING_MEMORY_ENABLED="true"))
-def test_enable_memory():
-    from ddtrace.profiling import profiler
-    from ddtrace.profiling.collector import memalloc
-
-    assert any(isinstance(col, memalloc.MemoryCollector) for col in profiler.Profiler()._profiler._collectors)
-
-
-@pytest.mark.subprocess(env=dict(DD_PROFILING_MEMORY_ENABLED="false"))
-def test_disable_memory():
-    from ddtrace.profiling import profiler
-    from ddtrace.profiling.collector import memalloc
-
-    assert all(not isinstance(col, memalloc.MemoryCollector) for col in profiler.Profiler()._profiler._collectors)
-
-
-def test_copy():
-    p = profiler._ProfilerInstance(env="123", version="dwq", service="foobar")
-    c = p.copy()
-    assert c == p
-    assert p.env == c.env
-    assert p.version == c.version
-    assert p.service == c.service
-    assert p.tracer == c.tracer
-    assert p.tags == c.tags
-
-
-def test_failed_start_collector(caplog, monkeypatch):
-    class ErrCollect(collector.Collector):
-        def _start_service(self):
-            raise RuntimeError("could not import required module")
-
-        def _stop_service(self):
-            pass
-
-        @staticmethod
-        def collect():
-            pass
-
-        @staticmethod
-        def snapshot():
-            raise Exception("error!")
-
-    monkeypatch.setenv("DD_PROFILING_UPLOAD_INTERVAL", "1")
-
-    class TestProfiler(profiler._ProfilerInstance):
-        def _build_default_exporters(self, *args, **kargs):
-            return None
-
-    p = TestProfiler()
-    err_collector = mock.MagicMock(wraps=ErrCollect())
-    p._collectors = [err_collector]
-    p.start()
-
-    def profiling_tuples(tuples):
-        return [t for t in tuples if t[0].startswith("ddtrace.profiling")]
-
-    assert profiling_tuples(caplog.record_tuples) == [
-        ("ddtrace.profiling.profiler", logging.ERROR, "Failed to start collector %r, disabling." % err_collector)
-    ]
-    time.sleep(2)
-    p.stop()
-    assert err_collector.snapshot.call_count == 0
-    assert profiling_tuples(caplog.record_tuples) == [
-        ("ddtrace.profiling.profiler", logging.ERROR, "Failed to start collector %r, disabling." % err_collector)
-    ]
-
-
-def test_default_collectors():
-    p = profiler.Profiler()
-    assert any(isinstance(c, stack.StackCollector) for c in p._profiler._collectors)
-    assert any(isinstance(c, threading.ThreadingLockCollector) for c in p._profiler._collectors)
-    try:
-        import asyncio as _  # noqa: F401
-    except ImportError:
-        pass
-    else:
-        assert any(isinstance(c, asyncio.AsyncioLockCollector) for c in p._profiler._collectors)
-    p.stop(flush=False)
-
-
-def test_profiler_serverless(monkeypatch):
-    # type: (...) -> None
-    monkeypatch.setenv("AWS_LAMBDA_FUNCTION_NAME", "foobar")
-    p = profiler.Profiler()
-    assert isinstance(p._scheduler, scheduler.ServerlessScheduler)
-    assert p.tags["functionname"] == "foobar"
-
-
-@pytest.mark.skipif(PYTHON_VERSION_INFO < (3, 10), reason="ddtrace under Python 3.9 is deprecated")
-@pytest.mark.subprocess()
-def test_profiler_ddtrace_deprecation():
-    """
-    ddtrace interfaces loaded by the profiler can be marked deprecated, and we should update
-    them when this happens.  As reported by https://github.com/DataDog/dd-trace-py/issues/8881
-    """
-    import warnings
-
-    with warnings.catch_warnings():
-        warnings.simplefilter("error", DeprecationWarning)
-        from ddtrace.profiling import _threading  # noqa:F401
-        from ddtrace.profiling import event  # noqa:F401
-        from ddtrace.profiling import profiler  # noqa:F401
-        from ddtrace.profiling import scheduler  # noqa:F401
-        from ddtrace.profiling.collector import _lock  # noqa:F401
-        from ddtrace.profiling.collector import _task  # noqa:F401
-        from ddtrace.profiling.collector import _traceback  # noqa:F401
-        from ddtrace.profiling.collector import memalloc  # noqa:F401
-        from ddtrace.profiling.collector import stack  # noqa:F401
-
-
-@pytest.mark.subprocess(
-    env=dict(DD_PROFILING_ENABLED="true"),
-    err="Failed to load ddup module (mock failure message), disabling profiling\n",
-)
-def test_libdd_failure_telemetry_logging():
-    """Test that libdd initialization failures log to telemetry. This mimics
-    one of the two scenarios where profiling can be configured.
-    1) using ddtrace-run with DD_PROFILNG_ENABLED=true
-    2) import ddtrace.profiling.auto
-    """
-
-    from unittest import mock
-
-    with (
-        mock.patch.multiple(
-            "ddtrace.internal.datadog.profiling.ddup",
-            failure_msg="mock failure message",
-            is_available=False,
-        ),
-        mock.patch("ddtrace.internal.telemetry.telemetry_writer.add_log") as mock_add_log,
-    ):
-        from ddtrace.internal.settings.profiling import config  # noqa:F401
-        from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
-
-        mock_add_log.assert_called_once()
-        call_args = mock_add_log.call_args
-        assert call_args[0][0] == TELEMETRY_LOG_LEVEL.ERROR
-        message = call_args[0][1]
-        assert "Failed to load ddup module" in message
-        assert "mock failure message" in message
-
-
-@pytest.mark.subprocess(
-    # We'd like to check the stderr, but it somehow leads to triggering the
-    # upload code path on macOS
-    err=None
-)
-def test_libdd_failure_telemetry_logging_with_auto():
-    from unittest import mock
-
-    with (
-        mock.patch.multiple(
-            "ddtrace.internal.datadog.profiling.ddup",
-            failure_msg="mock failure message",
-            is_available=False,
-        ),
-        mock.patch("ddtrace.internal.telemetry.telemetry_writer.add_log") as mock_add_log,
-    ):
-        from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
-        import ddtrace.profiling.auto  # noqa: F401
-
-        mock_add_log.assert_called_once()
-        call_args = mock_add_log.call_args
-        assert call_args[0][0] == TELEMETRY_LOG_LEVEL.ERROR
-        message = call_args[0][1]
-        assert "Failed to load ddup module" in message
-        assert "mock failure message" in message
-
-
-@pytest.mark.subprocess(
-    env=dict(DD_PROFILING_ENABLED="true"),
-    err="Failed to load stack_v2 module (mock failure message), falling back to v1 stack sampler\n",
-)
-def test_stack_v2_failure_telemetry_logging():
-    # Test that stack_v2 initialization failures log to telemetry. This is
-    # mimicking the behavior of ddtrace-run, where the config is imported to
-    # determine if profiling/stack_v2 is enabled
-
-    from unittest import mock
-
-    with (
-        mock.patch.multiple(
-            "ddtrace.internal.datadog.profiling.stack_v2",
-            failure_msg="mock failure message",
-            is_available=False,
-        ),
-        mock.patch("ddtrace.internal.telemetry.telemetry_writer.add_log") as mock_add_log,
-    ):
-        from ddtrace.internal.settings.profiling import config  # noqa: F401
-        from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
-
-        mock_add_log.assert_called_once()
-        call_args = mock_add_log.call_args
-        assert call_args[0][0] == TELEMETRY_LOG_LEVEL.ERROR
-        message = call_args[0][1]
-        assert "Failed to load stack_v2 module" in message
-        assert "mock failure message" in message
-
-
-@pytest.mark.subprocess(
-    # We'd like to check the stderr, but it somehow leads to triggering the
-    # upload code path on macOS.
-    err=None,
-)
-def test_stack_v2_failure_telemetry_logging_with_auto():
-    from unittest import mock
-
-    with (
-        mock.patch.multiple(
-            "ddtrace.internal.datadog.profiling.stack_v2",
-            failure_msg="mock failure message",
-            is_available=False,
-        ),
-        mock.patch("ddtrace.internal.telemetry.telemetry_writer.add_log") as mock_add_log,
-    ):
-        from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
-        import ddtrace.profiling.auto  # noqa: F401
-
-        mock_add_log.assert_called_once()
-        call_args = mock_add_log.call_args
-        assert call_args[0][0] == TELEMETRY_LOG_LEVEL.ERROR
-        message = call_args[0][1]
-        assert "Failed to load stack_v2 module" in message
-        assert "mock failure message" in message
-
-
-@pytest.mark.skipif(not sys.platform.startswith("linux"), reason="only works on linux")
-@pytest.mark.subprocess(err=None)
-# For macOS: Could print 'Error uploading' but okay to ignore since we are checking if native_id is set
-def test_user_threads_have_native_id():
-    from os import getpid
-    from threading import Thread
-    from threading import _MainThread  # pyright: ignore[reportAttributeAccessIssue]
-    from threading import current_thread
-    from time import sleep
-
-    from ddtrace.profiling import profiler
-
-    # DEV: We used to run this test with ddtrace_run=True passed into the
-    # subprocess decorator, but that caused this to be flaky for Python 3.8.x
-    # with gevent. When it failed for that specific venv, current_thread()
-    # returned a DummyThread instead of a _MainThread.
-    p = profiler.Profiler()
-    p.start()
-
-    main = current_thread()
-    assert isinstance(main, _MainThread)
-    # We expect the current thread to have the same ID as the PID
-    assert main.native_id == getpid(), (main.native_id, getpid())
-
-    t = Thread(target=lambda: None)
-    t.start()
-
-    for _ in range(10):
-        try:
-            # The TID should be higher than the PID, but not too high
-            assert 0 < t.native_id - getpid() < 100, (t.native_id, getpid())  # pyright: ignore[reportOptionalOperand]
-        except AttributeError:
-            # The native_id attribute is set by the thread so we might have to
-            # wait a bit for it to be set.
-            sleep(0.1)
-        else:
-            break
-    else:
-        raise AssertionError("Thread.native_id not set")
-
-    t.join()
-
-    p.stop()
diff --git a/tests/profiling_v2/test_scheduler.py b/tests/profiling_v2/test_scheduler.py
deleted file mode 100644
index 323629d1c39..00000000000
--- a/tests/profiling_v2/test_scheduler.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# -*- encoding: utf-8 -*-
-import logging
-import time
-from unittest import mock
-
-from ddtrace.profiling import scheduler
-
-
-def test_thread_name():
-    s = scheduler.Scheduler()
-    s.start()
-    assert s._worker is not None
-    assert s._worker.name == "ddtrace.profiling.scheduler:Scheduler"
-    s.stop()
-
-
-def test_before_flush():
-    x = {}
-
-    def call_me():
-        x["OK"] = True
-
-    s = scheduler.Scheduler(before_flush=call_me)
-    s.flush()
-    assert x["OK"]
-
-
-def test_before_flush_failure(caplog):
-    def call_me():
-        raise Exception("LOL")
-
-    s = scheduler.Scheduler(before_flush=call_me)
-    s.flush()
-    assert caplog.record_tuples == [
-        (("ddtrace.profiling.scheduler", logging.ERROR, "Scheduler before_flush hook failed"))
-    ]
-
-
-@mock.patch("ddtrace.profiling.scheduler.Scheduler.periodic")
-def test_serverless_periodic(mock_periodic):
-    s = scheduler.ServerlessScheduler()
-    # Fake start()
-    s._last_export = time.time_ns()
-    s.periodic()
-    assert s._profiled_intervals == 1
-    mock_periodic.assert_not_called()
-    s._last_export = time.time_ns() - 65
-    s._profiled_intervals = 65
-    s.periodic()
-    assert s._profiled_intervals == 0
-    assert s.interval == 1
-    mock_periodic.assert_called()
diff --git a/tests/profiling_v2/test_uwsgi.py b/tests/profiling_v2/test_uwsgi.py
deleted file mode 100644
index 6b5d4e7cf23..00000000000
--- a/tests/profiling_v2/test_uwsgi.py
+++ /dev/null
@@ -1,228 +0,0 @@
-from importlib.metadata import version
-import os
-import re
-import signal
-from subprocess import TimeoutExpired
-import sys
-import time
-
-import pytest
-
-from tests.contrib.uwsgi import run_uwsgi
-from tests.profiling.collector import pprof_utils
-
-
-# uwsgi is not available on Windows
-if sys.platform == "win32":
-    pytestmark = pytest.mark.skip
-
-TESTING_GEVENT = os.getenv("DD_PROFILE_TEST_GEVENT", False)
-THREADS_MSG = (
-    b"ddtrace.internal.uwsgi.uWSGIConfigError: enable-threads option must be set to true, or a positive "
-    b"number of threads must be set"
-)
-
-uwsgi_app = os.path.join(os.path.dirname(__file__), "..", "profiling", "uwsgi-app.py")
-
-
-@pytest.fixture
-def uwsgi(monkeypatch, tmp_path):
-    # Do not ignore profiler so we have samples in the output pprof
-    monkeypatch.setenv("DD_PROFILING_IGNORE_PROFILER", "0")
-    # Do not use pytest tmpdir fixtures which generate directories longer than allowed for a socket file name
-    socket_name = str(tmp_path / "uwsgi.sock")
-    import os
-
-    cmd = [
-        "uwsgi",
-        "--need-app",
-        "--die-on-term",
-        "--socket",
-        socket_name,
-        "--wsgi-file",
-        uwsgi_app,
-    ]
-
-    try:
-        yield run_uwsgi(cmd)
-    finally:
-        os.unlink(socket_name)
-
-
-def test_uwsgi_threads_disabled(uwsgi):
-    proc = uwsgi()
-    stdout, _ = proc.communicate()
-    assert proc.wait() != 0
-    assert THREADS_MSG in stdout
-
-
-def test_uwsgi_threads_number_set(uwsgi):
-    proc = uwsgi("--threads", "1")
-    try:
-        stdout, _ = proc.communicate(timeout=1)
-    except TimeoutExpired:
-        proc.terminate()
-        stdout, _ = proc.communicate()
-    assert THREADS_MSG not in stdout
-
-
-def test_uwsgi_threads_enabled(uwsgi, tmp_path, monkeypatch):
-    filename = str(tmp_path / "uwsgi.pprof")
-    monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", filename)
-    proc = uwsgi("--enable-threads")
-    worker_pids = _get_worker_pids(proc.stdout, 1)
-    # Give some time to the process to actually startup
-    time.sleep(3)
-    proc.terminate()
-    assert proc.wait() == 30
-    for pid in worker_pids:
-        profile = pprof_utils.parse_newest_profile("%s.%d" % (filename, pid))
-        samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
-        assert len(samples) > 0
-
-
-def test_uwsgi_threads_processes_no_primary(uwsgi, monkeypatch):
-    proc = uwsgi("--enable-threads", "--processes", "2")
-    stdout, _ = proc.communicate()
-    assert (
-        b"ddtrace.internal.uwsgi.uWSGIConfigError: master option must be enabled when multiple processes are used"
-        in stdout
-    )
-
-
-def _get_worker_pids(stdout, num_worker, num_app_started=1):
-    worker_pids = []
-    started = 0
-    while True:
-        line = stdout.readline()
-        if line == b"":
-            break
-        elif b"WSGI app 0 (mountpoint='') ready" in line:
-            started += 1
-        else:
-            m = re.match(r"^spawned uWSGI worker \d+ .*\(pid: (\d+),", line.decode())
-            if m:
-                worker_pids.append(int(m.group(1)))
-
-        if len(worker_pids) == num_worker and num_app_started == started:
-            break
-
-    return worker_pids
-
-
-def test_uwsgi_threads_processes_primary(uwsgi, tmp_path, monkeypatch):
-    filename = str(tmp_path / "uwsgi.pprof")
-    monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", filename)
-    proc = uwsgi("--enable-threads", "--master", "--py-call-uwsgi-fork-hooks", "--processes", "2")
-    worker_pids = _get_worker_pids(proc.stdout, 2)
-    # Give some time to child to actually startup
-    time.sleep(3)
-    proc.terminate()
-    assert proc.wait() == 0
-    for pid in worker_pids:
-        profile = pprof_utils.parse_newest_profile("%s.%d" % (filename, pid))
-        samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
-        assert len(samples) > 0
-
-
-def test_uwsgi_threads_processes_primary_lazy_apps(uwsgi, tmp_path, monkeypatch):
-    filename = str(tmp_path / "uwsgi.pprof")
-    monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", filename)
-    monkeypatch.setenv("DD_PROFILING_UPLOAD_INTERVAL", "1")
-    # For uwsgi<2.0.30, --skip-atexit is required to avoid crashes when
-    # the child process exits.
-    proc = uwsgi("--enable-threads", "--master", "--processes", "2", "--lazy-apps", "--skip-atexit")
-    worker_pids = _get_worker_pids(proc.stdout, 2, 2)
-    # Give some time to child to actually startup and output a profile
-    time.sleep(3)
-    proc.terminate()
-    assert proc.wait() == 0
-    for pid in worker_pids:
-        profile = pprof_utils.parse_newest_profile("%s.%d" % (filename, pid))
-        samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
-        assert len(samples) > 0
-
-
-def test_uwsgi_threads_processes_no_primary_lazy_apps(uwsgi, tmp_path, monkeypatch):
-    filename = str(tmp_path / "uwsgi.pprof")
-    monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", filename)
-    monkeypatch.setenv("DD_PROFILING_UPLOAD_INTERVAL", "1")
-    # For uwsgi<2.0.30, --skip-atexit is required to avoid crashes when
-    # the child process exits.
-    proc = uwsgi("--enable-threads", "--processes", "2", "--lazy-apps", "--skip-atexit")
-    worker_pids = _get_worker_pids(proc.stdout, 2, 2)
-    assert len(worker_pids) == 2
-
-    # Give some time to child to actually startup and output a profile
-    time.sleep(3)
-
-    # Kill master process
-    parent_pid: int = worker_pids[0]
-    os.kill(parent_pid, signal.SIGTERM)
-
-    # Wait for master to exit
-    res_pid, res_status = os.waitpid(parent_pid, 0)
-    print("")
-    print(f"INFO: Master process {parent_pid} exited with status {res_status} and pid {res_pid}")
-
-    # Attempt to kill worker proc once
-    worker_pid: int = worker_pids[1]
-    print(f"DEBUG: Checking worker {worker_pid} status after master exit:")
-    try:
-        os.kill(worker_pid, 0)
-        print(f"WARNING: Worker {worker_pid} is a zombie (will be cleaned up by init).")
-
-        os.kill(worker_pid, signal.SIGKILL)
-        print(f"WARNING: Worker {worker_pid} could not be killed with SIGKILL (will be cleaned up by init).")
-    except OSError:
-        print(f"INFO: Worker {worker_pid} was successfully killed.")
-
-    for pid in worker_pids:
-        profile = pprof_utils.parse_newest_profile("%s.%d" % (filename, pid))
-        samples = pprof_utils.get_samples_with_value_type(profile, "wall-time")
-        assert len(samples) > 0
-
-
-@pytest.mark.parametrize("lazy_flag", ["--lazy-apps", "--lazy"])
-@pytest.mark.skipif(
-    tuple(int(x) for x in version("uwsgi").split(".")) >= (2, 0, 30),
-    reason="uwsgi>=2.0.30 does not require --skip-atexit",
-)
-def test_uwsgi_require_skip_atexit_when_lazy_with_master(uwsgi, lazy_flag):
-    expected_warning = b"ddtrace.internal.uwsgi.uWSGIConfigDeprecationWarning: skip-atexit option must be set"
-
-    proc = uwsgi("--enable-threads", "--master", "--processes", "2", lazy_flag)
-    time.sleep(1)
-    proc.terminate()
-    stdout, _ = proc.communicate()
-    assert expected_warning in stdout
-
-
-@pytest.mark.parametrize("lazy_flag", ["--lazy-apps", "--lazy"])
-@pytest.mark.skipif(
-    tuple(int(x) for x in version("uwsgi").split(".")) >= (2, 0, 30),
-    reason="uwsgi>=2.0.30 does not require --skip-atexit",
-)
-def test_uwsgi_require_skip_atexit_when_lazy_without_master(uwsgi, lazy_flag):
-    expected_warning = b"ddtrace.internal.uwsgi.uWSGIConfigDeprecationWarning: skip-atexit option must be set"
-    num_workers = 2
-    proc = uwsgi("--enable-threads", "--processes", str(num_workers), lazy_flag)
-
-    worker_pids = []
-    logged_warning = 0
-    while True:
-        line = proc.stdout.readline()
-        if line == b"":
-            break
-        if expected_warning in line:
-            logged_warning += 1
-        else:
-            m = re.match(r"^spawned uWSGI worker \d+ .*\(pid: (\d+),", line.decode())
-            if m:
-                worker_pids.append(int(m.group(1)))
-
-        if logged_warning == num_workers:
-            break
-
-    for pid in worker_pids:
-        os.kill(pid, signal.SIGTERM)