From 121755d4eb128a1294918ae8569d75b6005efd6d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 8 May 2023 12:46:21 +0300 Subject: [PATCH 01/21] lockdep: Swap storage for pin_count and references As a lockmap takes a reference for every ww_mutex used together, this can be an arbitrarily large number and under control of userspace -- easily overflowing the arbitrary limit of 4096. However, the pin_count (used for detecting unexpected lock dropping) is a full 32b despite nesting being extremely rare (see lockdep_pin_lock). References: https://gitlab.freedesktop.org/drm/intel/-/issues/8028 Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190425092004.9995-33-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi [Joonas: Converting to pin_count:11 as per addition of sync:1] Signed-off-by: Joonas Lahtinen Signed-off-by: Rodrigo Vivi --- include/linux/lockdep_types.h | 4 ++-- kernel/locking/lockdep.c | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 9f361d3ab9d95d..cb943f4b238569 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -252,8 +252,8 @@ struct held_lock { unsigned int check:1; /* see lock_acquire() comment */ unsigned int hardirqs_off:1; unsigned int sync:1; - unsigned int references:11; /* 32 bits */ - unsigned int pin_count; + unsigned int pin_count:11; /* 32 bits */ + unsigned int references; }; #else /* !CONFIG_LOCKDEP */ diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 58d78a33ac65bf..eeb8320e1a627d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5621,11 +5621,14 @@ static struct pin_cookie __lock_pin_lock(struct lockdep_map *lock) if (match_held_lock(hlock, lock)) { /* - * Grab 16bits of randomness; this is sufficient to not - * be guessable and still allows some pin nesting in - * our u32 pin_count. + * Grab 6bits of randomness; this is barely sufficient + * to not be guessable and still allows some 32 levels + * of pin nesting in our u11 pin_count. */ - cookie.val = 1 + (sched_clock() & 0xffff); + cookie.val = 1 + (sched_clock() & 0x3f); + if (DEBUG_LOCKS_WARN_ON(hlock->pin_count + cookie.val >= 1 << 11)) + return NIL_COOKIE; + hlock->pin_count += cookie.val; return cookie; } From 00b6ad8a555a535d046af97254dda8b5e6fe4c20 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Nov 2017 12:57:06 +0000 Subject: [PATCH 02/21] ftrace: Allow configuring global trace buffer size (for dump-on-oops) We have recently turned on ftrace-dump-on-oops for i915's CI and an issue we have encountered is that the trace buffer size greatly exceeds the pstore capabilities; we get the tail of the oops but not the introduction. Currently the global buffer size is controllable on the cmdline, but at the request of our CI sysadmin, we would like to add a control to the Kconfig as well. The rationale being the cmdline carries the temporary hacks that we want to eradicate, and we want to track the permanent configuration in .config. I have kept the Kconfig option hidden from the user as the default should suffice for the majority of users; reserving the configuration for those that eschew the cmdline option. v2: Add an expert prompt to stop the default value overriding .config changes. References: https://gitlab.freedesktop.org/drm/intel/-/issues/8029 Signed-off-by: Chris Wilson Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Tomi Sarvela Cc: Joonas Lahtinen Cc: Daniel Vetter Signed-off-by: Rodrigo Vivi --- kernel/trace/Kconfig | 7 +++++++ kernel/trace/trace.c | 4 +--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a3f35c7d83b6c5..012db82731e296 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -176,6 +176,13 @@ config TRACING select TRACE_CLOCK select NEED_TASKS_RCU +config GLOBAL_TRACE_BUF_SIZE + int + prompt "Global ftrace buffer size (for trace_printk)" if EXPERT + range 0 4194034 + default 1441792 # 16384 * 88 (sizeof(struct print_entry)) + depends on TRACING + config GENERIC_TRACER bool select TRACING diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b581e388a9d9f7..30fc05335b9222 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -888,9 +888,7 @@ int tracing_is_enabled(void) * to not have to wait for all that output. Anyway this can be * boot time and run time configurable. */ -#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ - -static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; +static unsigned long trace_buf_size = CONFIG_GLOBAL_TRACE_BUF_SIZE; /* trace_types holds a link list of available tracers. */ static struct tracer *trace_types __read_mostly; From b387b7e2c4f39a9304d22ff3c38ae06e8880d0bb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 3 Sep 2018 14:17:45 +0100 Subject: [PATCH 03/21] kernel/panic: Show the stacktrace after additional notifier messages Most systems keep the last messages from the panic, and we value the stacktrace most, so dump it last in order to preserve it for post-mortems. References: https://gitlab.freedesktop.org/drm/intel/-/issues/8030 Signed-off-by: Chris Wilson Acked-by: Martin Peres Link: https://patchwork.freedesktop.org/patch/msgid/20180903131745.30593-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi --- kernel/panic.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index a3889f38153d96..5b2bc2ac8f67ea 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -346,13 +346,6 @@ void panic(const char *fmt, ...) buf[len - 1] = '\0'; pr_emerg("Kernel panic - not syncing: %s\n", buf); -#ifdef CONFIG_DEBUG_BUGVERBOSE - /* - * Avoid nested stack-dumping if a panic occurs during oops processing - */ - if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) - dump_stack(); -#endif /* * If kgdb is enabled, give it a chance to run before we stop all @@ -384,6 +377,14 @@ void panic(const char *fmt, ...) panic_print_sys_info(false); +#ifdef CONFIG_DEBUG_BUGVERBOSE + /* + * Avoid nested stack-dumping if a panic occurs during oops processing + */ + if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) + dump_stack(); +#endif + kmsg_dump_desc(KMSG_DUMP_PANIC, buf); /* From 43cf688985bc6b59eb999f9b5b60cc49073d7546 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Oct 2018 12:35:21 +0100 Subject: [PATCH 04/21] x86: Downgrade clock throttling thermal event critical error Under CI testing, it is common for the cpus to overheat with the continuous workloads and end up being throttled. As the cpus still function, it is less of a critical error meriting urgent action, but an expected yet significant condition (pr_note). References: https://gitlab.freedesktop.org/drm/intel/-/issues/8031 Signed-off-by: Chris Wilson Cc: Petri Latvala Signed-off-by: Rodrigo Vivi --- drivers/thermal/intel/therm_throt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c index e69868e868eb9e..9d18f2fba7a009 100644 --- a/drivers/thermal/intel/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -345,10 +345,10 @@ static void __maybe_unused throttle_active_work(struct work_struct *work) avg /= ARRAY_SIZE(state->temp_samples); if (state->average > avg) { - pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n", - this_cpu, - state->level == CORE_LEVEL ? "Core" : "Package", - state->count); + pr_notice("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n", + this_cpu, + state->level == CORE_LEVEL ? "Core" : "Package", + state->count); state->rate_control_active = true; } From 3f412047c54e28ecd50c10bdcec698f166c861e8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 16 Nov 2021 09:22:48 +0100 Subject: [PATCH 05/21] libata: Downgrade unsupported feature warnings to notifications References: https://gitlab.freedesktop.org/drm/intel/-/issues/8032 Signed-off-by: Chris Wilson Cc: Petri Latvala [danvet: Rebase] Signed-off-by: Rodrigo Vivi --- drivers/ata/libata-core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 773799cfd44308..14bed90d833676 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2178,7 +2178,7 @@ static bool ata_identify_page_supported(struct ata_device *dev, u8 page) * for drives which implement this ATA level or above. */ if (ata_id_major_version(dev->id) >= 10) - ata_dev_warn(dev, + ata_dev_notice(dev, "ATA Identify Device Log not supported\n"); dev->quirks |= ATA_QUIRK_NO_ID_DEV_LOG; return false; @@ -2249,7 +2249,7 @@ static void ata_dev_config_ncq_send_recv(struct ata_device *dev) unsigned int err_mask; if (!ata_log_supported(dev, ATA_LOG_NCQ_SEND_RECV)) { - ata_dev_warn(dev, "NCQ Send/Recv Log not supported\n"); + ata_dev_notice(dev, "NCQ Send/Recv Log not supported\n"); return; } err_mask = ata_read_log_page(dev, ATA_LOG_NCQ_SEND_RECV, @@ -2273,8 +2273,8 @@ static void ata_dev_config_ncq_non_data(struct ata_device *dev) unsigned int err_mask; if (!ata_log_supported(dev, ATA_LOG_NCQ_NON_DATA)) { - ata_dev_warn(dev, - "NCQ Non-Data Log not supported\n"); + ata_dev_notice(dev, + "NCQ Non-Data Log not supported\n"); return; } err_mask = ata_read_log_page(dev, ATA_LOG_NCQ_NON_DATA, @@ -2937,14 +2937,14 @@ int ata_dev_configure(struct ata_device *dev) if (ata_id_is_cfa(id)) { /* CPRM may make this media unusable */ if (id[ATA_ID_CFA_KEY_MGMT] & 1) - ata_dev_warn(dev, + ata_dev_notice(dev, "supports DRM functions and may not be fully accessible\n"); snprintf(revbuf, 7, "CFA"); } else { snprintf(revbuf, 7, "ATA-%d", ata_id_major_version(id)); /* Warn the user if the device has TPM extensions */ if (ata_id_has_tpm(id)) - ata_dev_warn(dev, + ata_dev_notice(dev, "supports DRM functions and may not be fully accessible\n"); } @@ -3100,8 +3100,8 @@ int ata_dev_configure(struct ata_device *dev) } if ((dev->quirks & ATA_QUIRK_FIRMWARE_WARN) && print_info) { - ata_dev_warn(dev, "WARNING: device requires firmware update to be fully functional\n"); - ata_dev_warn(dev, " contact the vendor or visit http://ata.wiki.kernel.org\n"); + ata_dev_notice(dev, "WARNING: device requires firmware update to be fully functional\n"); + ata_dev_notice(dev, " contact the vendor or visit http://ata.wiki.kernel.org\n"); } return 0; From 117031f2fdc406ebee33afc84102e65fea3501c1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 May 2019 22:46:48 +0200 Subject: [PATCH 06/21] RFC: hung_task: taint kernel There's the hung_task_panic sysctl, but that's a bit an extreme measure. As a fallback taint at least the machine. Our CI uses this to decide when a reboot is necessary, plus to figure out whether the kernel is still happy. v2: Works much better when I put the else { add_taint() } at the right place. References: https://gitlab.freedesktop.org/drm/intel/-/issues/8034 Signed-off-by: Daniel Vetter Cc: Andrew Morton Cc: Tetsuo Handa Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Valdis Kletnieks Cc: Daniel Vetter Cc: Vitaly Kuznetsov Cc: "Liu, Chuansheng" Acked-by: Chris Wilson (for core-for-CI) Link: https://patchwork.freedesktop.org/patch/msgid/20190502204648.5537-1-daniel.vetter@ffwll.ch Signed-off-by: Jani Nikula Signed-off-by: Rodrigo Vivi --- kernel/hung_task.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index dc898ec93463f6..d8c5e7bc8826e6 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -169,6 +169,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) console_verbose(); hung_task_show_lock = true; hung_task_call_panic = true; + } else { + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } /* From 6d4322c03d37c088ffe1149f40a6cff93183a0a5 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 May 2019 21:42:08 +0200 Subject: [PATCH 07/21] RFC: soft/hardlookup: taint kernel There's the soft/hardlookup_panic sysctls, but that's a bit an extreme measure. As a fallback taint at least the machine. Our CI uses this to decide when a reboot is necessary, plus to figure out whether the kernel is still happy. References: https://gitlab.freedesktop.org/drm/intel/-/issues/8035 Signed-off-by: Daniel Vetter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Valdis Kletnieks Cc: Laurence Oberman Cc: Vincent Whitchurch Cc: Don Zickus Cc: Andrew Morton Cc: Sergey Senozhatsky Cc: Sinan Kaya Cc: Daniel Vetter Acked-by: Chris Wilson (for core-for-CI) Link: https://patchwork.freedesktop.org/patch/msgid/20190502194208.3535-2-daniel.vetter@ffwll.ch Signed-off-by: Jani Nikula Signed-off-by: Rodrigo Vivi --- kernel/watchdog.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9fa2af9dbf2cec..f1281e9d2bf36c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -214,6 +214,8 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); + else + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); per_cpu(watchdog_hardlockup_warned, cpu) = true; } else { @@ -776,6 +778,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); if (softlockup_panic) panic("softlockup: hung tasks"); + else + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } return HRTIMER_RESTART; From 9d4e25c945ca0023f23d8be78a613aa822be3cdf Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 8 May 2023 12:53:35 +0300 Subject: [PATCH 08/21] net/sch_generic: Shut up noise We can't allow spam in CI. Update 26th June 2018: This is still an issue: Update 23rd May 2019: You guessed it, still ocurring. [ 224.739686] ------------[ cut here ]------------ [ 224.739712] WARNING: CPU: 3 PID: 2982 at net/sched/sch_generic.c:461 dev_watchdog+0x1fd/0x210 [ 224.739714] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_pcm i915 asix usbnet mii mei_me mei prime_numbers i2c_hid pinctrl_sunrisepoint pinctrl_intel btusb btrtl btbcm btintel bluetooth ecdh_generic [ 224.739775] CPU: 3 PID: 2982 Comm: gem_exec_suspen Tainted: G U W 4.18.0-rc2-CI-Patchwork_9414+ #1 [ 224.739777] Hardware name: Dell Inc. XPS 13 9350/, BIOS 1.4.12 11/30/2016 [ 224.739780] RIP: 0010:dev_watchdog+0x1fd/0x210 [ 224.739781] Code: 49 63 4c 24 f0 eb 92 4c 89 ef c6 05 21 46 ad 00 01 e8 77 ee fc ff 89 d9 48 89 c2 4c 89 ee 48 c7 c7 88 4c 14 82 e8 a3 fe 84 ff <0f> 0b eb be 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 c7 47 [ 224.739866] RSP: 0018:ffff88027dd83e40 EFLAGS: 00010286 [ 224.739869] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000102 [ 224.739871] RDX: 0000000080000102 RSI: ffffffff820c8c6c RDI: 00000000ffffffff [ 224.739873] RBP: ffff8802644c1540 R08: 0000000071be9b33 R09: 0000000000000000 [ 224.739874] R10: ffff88027dd83dc0 R11: 0000000000000000 R12: ffff8802644c1588 [ 224.739876] R13: ffff8802644c1160 R14: 0000000000000001 R15: ffff88026a5dc728 [ 224.739878] FS: 00007f18f4887980(0000) GS:ffff88027dd80000(0000) knlGS:0000000000000000 [ 224.739880] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 224.739881] CR2: 00007f4c627ae548 CR3: 000000022ca1a002 CR4: 00000000003606e0 [ 224.739883] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 224.739885] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 224.739886] Call Trace: [ 224.739888] [ 224.739892] ? qdisc_reset+0xe0/0xe0 [ 224.739894] ? qdisc_reset+0xe0/0xe0 [ 224.739897] call_timer_fn+0x93/0x360 [ 224.739903] expire_timers+0xc1/0x1d0 [ 224.739908] run_timer_softirq+0xc7/0x170 [ 224.739916] __do_softirq+0xd9/0x505 [ 224.739923] irq_exit+0xa9/0xc0 [ 224.739926] smp_apic_timer_interrupt+0x9c/0x2d0 [ 224.739929] apic_timer_interrupt+0xf/0x20 [ 224.739931] [ 224.739934] RIP: 0010:delay_tsc+0x2e/0xb0 [ 224.739936] Code: 49 89 fc 55 53 bf 01 00 00 00 e8 6d 2c 78 ff e8 88 9d b6 ff 41 89 c5 0f ae e8 0f 31 48 c1 e2 20 48 09 c2 48 89 d5 eb 16 f3 90 01 00 00 00 e8 48 2c 78 ff e8 63 9d b6 ff 44 39 e8 75 36 0f ae [ 224.740021] RSP: 0018:ffffc900002f7d48 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13 [ 224.740024] RAX: 0000000080000000 RBX: 0000000649565ca9 RCX: 0000000000000001 [ 224.740026] RDX: 0000000080000001 RSI: ffffffff820c8c6c RDI: 00000000ffffffff [ 224.740027] RBP: 00000006493ea9ce R08: 000000005e81e2ee R09: 0000000000000000 [ 224.740029] R10: 0000000000000120 R11: 0000000000000000 R12: 00000000002ad8d6 [ 224.740030] R13: 0000000000000003 R14: 0000000000000004 R15: ffff88025caf5408 [ 224.740040] ? delay_tsc+0x66/0xb0 [ 224.740045] hibernation_debug_sleep+0x1c/0x30 [ 224.740048] hibernation_snapshot+0x2c1/0x690 [ 224.740053] hibernate+0x142/0x2a4 [ 224.740057] state_store+0xd0/0xe0 [ 224.740063] kernfs_fop_write+0x104/0x190 [ 224.740068] __vfs_write+0x31/0x180 [ 224.740072] ? rcu_read_lock_sched_held+0x6f/0x80 [ 224.740075] ? rcu_sync_lockdep_assert+0x29/0x50 [ 224.740078] ? __sb_start_write+0x152/0x1f0 [ 224.740080] ? __sb_start_write+0x168/0x1f0 [ 224.740084] vfs_write+0xbd/0x1a0 [ 224.740088] ksys_write+0x50/0xc0 [ 224.740094] do_syscall_64+0x55/0x190 [ 224.740097] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 224.740099] RIP: 0033:0x7f18f400a281 [ 224.740100] Code: c3 0f 1f 84 00 00 00 00 00 48 8b 05 59 8d 20 00 c3 0f 1f 84 00 00 00 00 00 8b 05 8a d1 20 00 85 c0 75 16 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 41 54 55 49 89 d4 53 [ 224.740186] RSP: 002b:00007fffd1f4fec8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 224.740189] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f18f400a281 [ 224.740190] RDX: 0000000000000004 RSI: 00007f18f448069a RDI: 0000000000000006 [ 224.740192] RBP: 00007fffd1f4fef0 R08: 0000000000000000 R09: 0000000000000000 [ 224.740194] R10: 0000000000000000 R11: 0000000000000246 R12: 000055e795d03400 [ 224.740195] R13: 00007fffd1f50500 R14: 0000000000000000 R15: 0000000000000000 [ 224.740205] irq event stamp: 1582591 [ 224.740207] hardirqs last enabled at (1582590): [] vprintk_emit+0x4bc/0x4d0 [ 224.740210] hardirqs last disabled at (1582591): [] error_entry+0x7c/0x100 [ 224.740212] softirqs last enabled at (1582568): [] __do_softirq+0x34f/0x505 [ 224.740215] softirqs last disabled at (1582571): [] irq_exit+0xa9/0xc0 [ 224.740218] WARNING: CPU: 3 PID: 2982 at net/sched/sch_generic.c:461 dev_watchdog+0x1fd/0x210 [ 224.740219] ---[ end trace 6e41d690e611c338 ]--- References: https://gitlab.freedesktop.org/drm/intel/-/issues/8037 References: https://bugzilla.kernel.org/show_bug.cgi?id=196399 Acked-by: Martin Peres Cc: Martin Peres Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170718082110.12524-1-daniel.vetter@ffwll.ch Signed-off-by: Rodrigo Vivi Signed-off-by: Joonas Lahtinen Signed-off-by: Rodrigo Vivi --- net/sched/sch_generic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 514b1b6ac68196..a518fcdb96ba41 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -530,7 +530,12 @@ static void dev_watchdog(struct timer_list *t) oldest_start = trans_start; } - if (unlikely(timedout_ms)) { + /* The noise is pissing off our CI and upstream doesn't + * move on the bug report: + * + * https://bugzilla.kernel.org/show_bug.cgi?id=196399 + */ + if (unlikely(timedout_ms) && 0) { trace_net_dev_xmit_timeout(dev, i); netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n", raw_smp_processor_id(), From c36f2dc9f8c58946a054756de4a43754b09ebdc1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Jul 2019 15:29:03 +0100 Subject: [PATCH 09/21] mm: Show slab debug as offsets from section base not hashed pointers Since the kernel now used hashed pointers for raw addresses, it is very hard to guage the relative placement within a section, and since the hash value will never match up with any contents, using it provides no information relevant for slab debugging. Show the relative offset into each section, so that some reference for the hexdump is provided. References: https://gitlab.freedesktop.org/drm/intel/-/issues/8038 Signed-off-by: Chris Wilson Signed-off-by: Rodrigo Vivi --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index b46f87662e71d4..bfd924674ad8d0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -910,7 +910,7 @@ static void print_section(char *level, char *text, u8 *addr, unsigned int length) { metadata_access_enable(); - print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, + print_hex_dump(level, text, DUMP_PREFIX_OFFSET, 16, 1, kasan_reset_tag((void *)addr), length, 1); metadata_access_disable(); } From 320d83b9dcc4618faa31009a434e127e0262ec0e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Apr 2020 09:27:53 +0100 Subject: [PATCH 10/21] pci/msi: Stop warning for MSI enabling failure If the MSI is already enabled, trying to enable it again results in an -EINVAL and on the first attempt a WARN. That WARN causes our CI to abort the run [on each first attempt to suspend]: <4> [463.142025] WARNING: CPU: 0 PID: 2225 at drivers/pci/msi.c:1074 __pci_enable_msi_range+0x3cb/0x420 <4> [463.142026] Modules linked in: snd_hda_intel i915 snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic mei_hdcp x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul snd_intel_dspcfg ghash_clmulni_intel snd_hda_codec btusb btrtl btbcm btintel e1000e bluetooth snd_hwdep snd_hda_core ptp ecdh_generic snd_pcm ecc pps_core mei_me mei prime_numbers [last unloaded: i915] <4> [463.142045] CPU: 0 PID: 2225 Comm: kworker/u8:14 Tainted: G U 5.7.0-rc2-CI-CI_DRM_8350+ #1 <4> [463.142046] Hardware name: Intel Corporation NUC7i5BNH/NUC7i5BNB, BIOS BNKBL357.86A.0060.2017.1214.2013 12/14/2017 <4> [463.142049] Workqueue: events_unbound async_run_entry_fn <4> [463.142051] RIP: 0010:__pci_enable_msi_range+0x3cb/0x420 <4> [463.142053] Code: 76 58 49 8d 56 48 48 89 df e8 31 73 fd ff e9 20 fe ff ff 31 f6 48 89 df e8 c2 e9 fd ff e9 d6 fe ff ff 45 89 fc e9 1a ff ff ff <0f> 0b 41 bc ea ff ff ff e9 0d ff ff ff 41 bc ea ff ff ff e9 02 ff <4> [463.142054] RSP: 0018:ffffc90000593cd0 EFLAGS: 00010202 <4> [463.142056] RAX: 0000000000000010 RBX: ffff888274051000 RCX: 0000000000000000 <4> [463.142057] RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffff888274051000 <4> [463.142058] RBP: ffff888238aa1018 R08: 0000000000000001 R09: 0000000000000001 <4> [463.142060] R10: ffffc90000593d90 R11: 00000000c79cdfd5 R12: ffff8882740510b0 <4> [463.142061] R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000001 <4> [463.142062] FS: 0000000000000000(0000) GS:ffff888276c00000(0000) knlGS:0000000000000000 <4> [463.142064] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [463.142065] CR2: 000055706f347d80 CR3: 0000000005610003 CR4: 00000000003606f0 <4> [463.142066] Call Trace: <4> [463.142073] pci_enable_msi+0x11/0x20 <4> [463.142077] azx_resume+0x1ab/0x200 [snd_hda_intel] <4> [463.142080] ? pci_pm_thaw+0x80/0x80 <4> [463.142084] dpm_run_callback+0x64/0x280 <4> [463.142089] device_resume+0xd4/0x1c0 <4> [463.142093] ? dpm_watchdog_set+0x60/0 While this would appear to be a bug in snd-hda, it does appear inconsequential, at least for gfx-ci. Downgrade the warning to an info, like the other already-enabled error for MSI-X. References: https://gitlab.freedesktop.org/drm/intel/-/issues/8041 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1687 Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200423082753.3899018-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi --- drivers/pci/msi/msi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 6569ba3577fe63..a11fb9c2e857f2 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -429,8 +429,10 @@ int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, if (maxvec < minvec) return -ERANGE; - if (WARN_ON_ONCE(dev->msi_enabled)) + if (dev->msi_enabled) { + pci_info(dev, "can't enable MSI, already enabled\n"); return -EINVAL; + } /* Test for the availability of MSI support */ if (!pci_msi_domain_supports(dev, 0, ALLOW_LEGACY)) From 016489e6a1d940bab49460ab70338deb0598cf0b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Dec 2020 16:47:00 +0000 Subject: [PATCH 11/21] HAX net/phy: Suppress WARN for calling stop while halted References: https://gitlab.freedesktop.org/drm/intel/-/issues/8046 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2805 Signed-off-by: Rodrigo Vivi --- drivers/net/phy/phy.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 13df28445f0201..75654b5a8858bb 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1626,11 +1626,8 @@ void phy_stop(struct phy_device *phydev) enum phy_state old_state; if (!phy_is_started(phydev) && phydev->state != PHY_DOWN && - phydev->state != PHY_ERROR) { - WARN(1, "called from state %s\n", - phy_state_to_str(phydev->state)); + phydev->state != PHY_ERROR) return; - } mutex_lock(&phydev->lock); old_state = phydev->state; From 2822b185a2a63046a9f9c5c2c4759afbed22c824 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Mon, 8 May 2023 13:09:13 +0300 Subject: [PATCH 12/21] HAX net/phy: Suppress WARN from phy_error References: https://gitlab.freedesktop.org/drm/intel/-/issues/8047 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2874 Signed-off-by: Rodrigo Vivi Signed-off-by: Joonas Lahtinen Signed-off-by: Rodrigo Vivi --- drivers/net/phy/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 75654b5a8858bb..2e2fcb83141b89 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1361,7 +1361,7 @@ static void phy_error_precise(struct phy_device *phydev, */ void phy_error(struct phy_device *phydev) { - WARN_ON(1); + pr_notice_once("%s\n", __func__); phy_process_error(phydev); } EXPORT_SYMBOL(phy_error); From 4e8698d074c82bb9b09e37f7d5f0cd663551697d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 4 Jun 2024 19:16:18 +0300 Subject: [PATCH 13/21] thunderbolt: Add Kconfig option to disable PCIe tunneling In typical cases PCIe tunneling is needed to make the devices fully usable for the host system. However, it poses a security issue because they can also use DMA to access the host memory. We already have two ways of preventing this, one an IOMMU that is enabled on recent systems by default and the second is the "authorized" attribute under each connected device that needs to be written by userspace before a PCIe tunnel is created. This option adds one more by adding a Kconfig option, which is enabled by default, that can be used to make kernel binaries where PCIe tunneling is completely disabled. Signed-off-by: Mika Westerberg References: https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_134314v1/bat-mtlp-9/boot0.txt References: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11261 Signed-off-by: Imre Deak Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240604161618.1958674-1-imre.deak@intel.com Signed-off-by: Rodrigo Vivi --- drivers/thunderbolt/Kconfig | 18 ++++++++++++++++++ drivers/thunderbolt/tb.c | 2 +- drivers/thunderbolt/tb.h | 9 +++++++++ drivers/thunderbolt/tunnel.c | 8 ++++---- drivers/thunderbolt/usb4.c | 2 +- 5 files changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/thunderbolt/Kconfig b/drivers/thunderbolt/Kconfig index 0abdb69ee9f437..8bf4ecf7f76eef 100644 --- a/drivers/thunderbolt/Kconfig +++ b/drivers/thunderbolt/Kconfig @@ -18,6 +18,24 @@ menuconfig USB4 if USB4 +config USB4_PCIE_TUNNELING + bool "Allow PCI Express tunneling over USB4 fabric" + depends on PCI + default y + help + USB4 and Thunderbolt devices typically include PCIe switch + with a number of PCIe endpoints such as USB host controllers, + GPUs and network adapters. These are made available to the + host system through PCIe tunneling. These can use DMA and + therefore have access to the host memory which is typically + guarded by an IOMMU. This option allows disabling PCIe + tunneling completely. + + For devices to be usable it is recommended to say Y here. + + Note this only works with systems that use Software Based + Connection Manager (this is most USB4 hosts). + config USB4_DEBUGFS_WRITE bool "Enable write by debugfs to configuration spaces (DANGEROUS)" help diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 8c527af989271c..f19839b3b2c6d1 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -3348,7 +3348,7 @@ struct tb *tb_probe(struct tb_nhi *nhi) if (!tb) return NULL; - if (tb_acpi_may_tunnel_pcie()) + if (tb_may_tunnel_pcie()) tb->security_level = TB_SECURITY_USER; else tb->security_level = TB_SECURITY_NOPCIE; diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index b54147a1ba8778..d1ce8f1eccf6c1 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -1504,6 +1504,15 @@ static inline int tb_acpi_power_on_retimers(struct tb_port *port) { return 0; } static inline int tb_acpi_power_off_retimers(struct tb_port *port) { return 0; } #endif +static inline bool tb_may_tunnel_pcie(void) +{ +#ifdef CONFIG_USB4_PCIE_TUNNELING + return tb_acpi_may_tunnel_pcie(); +#else + return false; +#endif +} + #ifdef CONFIG_DEBUG_FS void tb_debugfs_init(void); void tb_debugfs_exit(void); diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index 76254ed3f47f4b..d3fc4173d9cecb 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -122,7 +122,7 @@ static unsigned int tb_available_credits(const struct tb_port *port, size_t ndp; usb3 = tb_acpi_may_tunnel_usb3() ? sw->max_usb3_credits : 0; - pcie = tb_acpi_may_tunnel_pcie() ? sw->max_pcie_credits : 0; + pcie = tb_may_tunnel_pcie() ? sw->max_pcie_credits : 0; if (tb_acpi_is_xdomain_allowed()) { spare = min_not_zero(sw->max_dma_credits, dma_credits); @@ -479,7 +479,7 @@ bool tb_tunnel_reserved_pci(struct tb_port *port, int *reserved_up, if (WARN_ON_ONCE(!port->remote)) return false; - if (!tb_acpi_may_tunnel_pcie()) + if (!tb_may_tunnel_pcie()) return false; if (tb_port_get_link_generation(port) < 4) @@ -1646,7 +1646,7 @@ static unsigned int tb_dma_available_credits(const struct tb_port *port) int credits; credits = tb_available_credits(port, NULL); - if (tb_acpi_may_tunnel_pcie()) + if (tb_may_tunnel_pcie()) credits -= sw->max_pcie_credits; credits -= port->dma_credits; @@ -1957,7 +1957,7 @@ static int tb_usb3_consumed_bandwidth(struct tb_tunnel *tunnel, int *consumed_up, int *consumed_down) { struct tb_port *port = tb_upstream_port(tunnel->dst_port->sw); - int pcie_weight = tb_acpi_may_tunnel_pcie() ? TB_PCI_WEIGHT : 0; + int pcie_weight = tb_may_tunnel_pcie() ? TB_PCI_WEIGHT : 0; /* * PCIe tunneling, if enabled, affects the USB3 bandwidth so diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index e51d01671d8e7c..1b740d7fc7dab0 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -276,7 +276,7 @@ int usb4_switch_setup(struct tb_switch *sw) * Only enable PCIe tunneling if the parent router supports it * and it is not disabled. */ - if (tb_acpi_may_tunnel_pcie() && + if (tb_may_tunnel_pcie() && tb_switch_find_port(parent, TB_TYPE_PCIE_DOWN)) { val |= ROUTER_CS_5_PTO; /* From abfe4d1d04b5806ffcf552b2482b8b279846f152 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 3 Feb 2025 15:31:13 +0200 Subject: [PATCH 14/21] Revert "lockdep: Enable PROVE_RAW_LOCK_NESTING with PROVE_LOCKING." This reverts commit 560af5dc839eef08a273908f390cfefefb82aa04. Locking in i915_pmu.c interacting with perf is completely wrong. It's using spinlock_t everywhere when it should actually use raw_spinlock_t since perf is already holding raw_spinlock in the caller. This started to be checked with commit 560af5dc839e ("lockdep: Enable PROVE_RAW_LOCK_NESTING with PROVE_LOCKING."), but should only be a real issue when PREEMPT_RT is enabled: in that config, the spinlock_t can sleep and creates issue. Reworking the locks in i915_pmu.c is not very simple as changing locks to raw_spinlock_t cascades to too many locks, which is both a) not desired from an RT perspective and b) hard to get right as it calls into other parts of the driver that have other requirements. Example backtrace: <4> [141.043897] ============================= <4> [141.043922] [ BUG: Invalid wait context ] <4> [141.043940] 6.13.0-rc2-CI_DRM_15820-g78bd7a249aa0+ #1 Not tainted <4> [141.043964] ----------------------------- <4> [141.043981] swapper/0/0 is trying to lock: <4> [141.044000] ffff88810861b910 (&pmu->lock){....}-{3:3}, at: i915_pmu_enable+0x48/0x3a0 [i915] <4> [141.044194] other info that might help us debug this: <4> [141.044217] context-{5:5} <4> [141.044229] 1 lock held by swapper/0/0: <4> [141.044248] #0: ffff88885f432038 (&cpuctx_lock){....}-{2:2}, at: __perf_install_in_context+0x3f/0x360 <4> [141.044297] stack backtrace: <4> [141.044312] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.13.0-rc2-CI_DRM_15820-g78bd7a249aa0+ #1 <4> [141.044353] Hardware name: Intel Corporation Meteor Lake Client Platform/MTL-P LP5x T3 RVP, BIOS MTLPFWI1.R00.3471.D91.2401310918 01/31/2024 <4> [141.044405] Call Trace: <4> [141.044419] <4> [141.044431] dump_stack_lvl+0x91/0xf0 <4> [141.044454] dump_stack+0x10/0x20 <4> [141.044472] __lock_acquire+0x990/0x2820 <4> [141.044498] lock_acquire+0xc9/0x300 <4> [141.044518] ? i915_pmu_enable+0x48/0x3a0 [i915] <4> [141.044689] _raw_spin_lock_irqsave+0x49/0x80 <4> [141.044713] ? i915_pmu_enable+0x48/0x3a0 [i915] <4> [141.044903] i915_pmu_enable+0x48/0x3a0 [i915] <4> [141.045112] ? __lock_acquire+0x455/0x2820 <4> [141.045142] i915_pmu_event_add+0x71/0x90 [i915] More time is needed to get this fixed properly, but let's not pile regressions on top. Signed-off-by: Luca Coelho Link: https://patchwork.freedesktop.org/patch/msgid/20241211121703.2890150-1-luciano.coelho@intel.com [ Reword commit message, giving more detail on what the issue is ] Signed-off-by: Lucas De Marchi References: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13311 Acked-by: Rodrigo Vivi --- lib/Kconfig.debug | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9fe4d8dfe57829..a5e76c86b32f04 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1420,12 +1420,20 @@ config PROVE_LOCKING config PROVE_RAW_LOCK_NESTING bool "Enable raw_spinlock - spinlock nesting checks" if !ARCH_SUPPORTS_RT depends on PROVE_LOCKING - default y if ARCH_SUPPORTS_RT + default n help Enable the raw_spinlock vs. spinlock nesting checks which ensure that the lock nesting rules for PREEMPT_RT enabled kernels are not violated. + NOTE: There are known nesting problems. So if you enable this + option expect lockdep splats until these problems have been fully + addressed which is work in progress. This config switch allows to + identify and analyze these problems. It will be removed and the + check permanently enabled once the main issues have been fixed. + + If unsure, select N. + config LOCK_STAT bool "Lock usage statistics" depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT From 6c87d5b5bc046f802813f750485414e3fc3d5914 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 11 Jul 2024 21:23:20 +0200 Subject: [PATCH 15/21] drm/xe: Enable SR-IOV for ADL/ATSM We should now have sufficient changes in the Xe driver changes to run it on ADL and ATSM platforms in the PF mode, to configure VFs and successfully probe driver on the enabled VF devices. While some more changes are likely still needed to fix all corner cases, we will not find them without running any tests. To start testing this feature by the CI, we need to mark which platforms have basic SR-IOV support and let the driver run in the PF mode. Since this feature support is still in the early testing stage, make all enabling available only for CONFIG_DRM_XE_DEBUG=y and keep it on CI topic branch. Note that from this point, on selected platforms, the Xe driver will be acting as a PF driver, will some SR-IOV specific changes compared to running in the non-virtualized (native) mode. However, those specific changes are visible mostly on the debugfs, and should not impact normal driver execution, unless VFs will be manually provisioned or explicitly enabled. Once we finish adding the remaining SR-IOV tests to the CI and fix any issues that we find in the meantime, we will replace this patch with proper series outside the topic branch. Suggested-by: Rodrigo Vivi Signed-off-by: Michal Wajdeczko Acked-by: Lucas De Marchi Acked-by: Thomas Hellstrom Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240711192320.1198-3-michal.wajdeczko@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_module.c | 3 +++ drivers/gpu/drm/xe/xe_pci.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index be8603b16ff3fb..15b3cf22193c47 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -22,6 +22,9 @@ struct xe_modparam xe_modparam = { .probe_display = true, .guc_log_level = 3, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, +#ifdef CONFIG_PCI_IOV + .max_vfs = IS_ENABLED(CONFIG_DRM_XE_DEBUG) ? ~0 : 0, +#endif .wedged_mode = 1, .svm_notifier_size = 512, /* the rest are 0 by default */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 07fe994f2a807d..c965898b3ebf57 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -224,6 +224,7 @@ static const struct xe_device_desc adl_s_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = IS_ENABLED(CONFIG_DRM_XE_DEBUG), .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, @@ -240,6 +241,7 @@ static const struct xe_device_desc adl_p_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = IS_ENABLED(CONFIG_DRM_XE_DEBUG), .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, @@ -254,6 +256,7 @@ static const struct xe_device_desc adl_n_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = IS_ENABLED(CONFIG_DRM_XE_DEBUG), .require_force_probe = true, }; @@ -294,6 +297,7 @@ static const struct xe_device_desc ats_m_desc = { DG2_FEATURES, .has_display = false, + .has_sriov = IS_ENABLED(CONFIG_DRM_XE_DEBUG), }; static const struct xe_device_desc dg2_desc = { From e3332d1bcbe86d145a19301cff0dc51d3270991c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 8 Dec 2023 12:11:06 -0500 Subject: [PATCH 16/21] drm/xe: Add PVC's PCI device IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces Ponte Vecchio support in Xe driver. Please note that besides this patch, likely the force_probe still needs to be used in order to actually enable the support for PVC. This patch was separated from the rest so we can ensure compliance with DRM uAPI rules on compute platforms. Cc: Lucas De Marchi Cc: Thomas Hellström Cc: Oded Gabbay Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1154 Signed-off-by: Rodrigo Vivi Reviewed-by: Lucas De Marchi Signed-off-by: Thomas Hellström Signed-off-by: Lucas De Marchi Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index c965898b3ebf57..4fe7e0d941a911 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -380,6 +380,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), INTEL_ARL_IDS(INTEL_VGA_DEVICE, &mtl_desc), INTEL_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), + INTEL_PVC_IDS(INTEL_VGA_DEVICE, &pvc_desc), INTEL_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), INTEL_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), INTEL_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc), From 20f634360f3f26b4fb97e64622c681b473d41443 Mon Sep 17 00:00:00 2001 From: Julia Filipchuk Date: Thu, 3 Apr 2025 11:56:16 -0700 Subject: [PATCH 17/21] drm/xe/pvc: Add GuC firmware definition Add pre-release support for PVC. UAPI version 1.13.4. Signed-off-by: Julia Filipchuk Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20250403185619.1555853-7-John.C.Harrison@Intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_uc_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2741849bbf4df3..fe1813deff004b 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -117,6 +117,7 @@ struct fw_blobs_by_type { fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \ fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ + fw_def(PVC, GT_TYPE_ANY, mmp_ver(xe, guc, pvc, 70, 44, 1)) \ fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \ fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ From 727af07109f42257bedc30c19c98fec3805c1c89 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Thu, 3 Apr 2025 11:56:17 -0700 Subject: [PATCH 18/21] drm/xe/ptl: Add GuC firmware definition Define the GuC firmware to load on the platform. Signed-off-by: Clint Taylor Signed-off-by: Matt Atwood Reviewed-by: John Harrison Acked-by: Lucas De Marchi Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20250403185619.1555853-8-John.C.Harrison@Intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_uc_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index fe1813deff004b..1db598d3b1de3f 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -114,6 +114,7 @@ struct fw_blobs_by_type { #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, mmp_ver(xe, guc, ptl, 70, 44, 1)) \ fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \ fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ From b7aa26cfe2bf552630ece2cce510350d1360b575 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 7 Mar 2025 19:56:36 -0500 Subject: [PATCH 19/21] drm/xe/pm: Re-enable D3Cold by default on BMG This patch re-enables D3Cold by default on BMG. If issues on runtime_pm resume are seen and the D3cold->D0 transition is suspected to block the device or cause memory corruptions, D3cold can be disabled for confirmation with either: 1. at runtime: echo 0 > /sys/bus/pci/devices//vram_d3cold_threshold 2. at boot: pcie_port_pm=off Upon confirmation of D3Cold related bug, please file a bug to the link below. Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/ Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250308005636.1475420-2-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 4e112fbacada45..aaba2a97bb3aa3 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -279,10 +279,6 @@ ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ static u32 vram_threshold_value(struct xe_device *xe) { - /* FIXME: D3Cold temporarily disabled by default on BMG */ - if (xe->info.platform == XE_BATTLEMAGE) - return 0; - return DEFAULT_VRAM_THRESHOLD; } From 21c916e4f3d332d70fea7d5659d341de32c6ca9e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 11 Apr 2025 14:44:53 +0300 Subject: [PATCH 20/21] drm-tip: 2025y-04m-11d-11h-44m-23s UTC integration manifest --- integration-manifest | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 integration-manifest diff --git a/integration-manifest b/integration-manifest new file mode 100644 index 00000000000000..dc489857ad36ba --- /dev/null +++ b/integration-manifest @@ -0,0 +1,28 @@ +drm drm-fixes 485442c6a523de1d293350e039a9d9df9c08704c + Merge tag 'drm-xe-fixes-2025-04-10' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes +drm-misc drm-misc-fixes 31660b406d872b5ccb3c2ec6f932969809c35b18 + accel/ivpu: Add cmdq_id to job related logs +drm-intel drm-intel-fixes e3ea2eae70692a455e256787e4f54153fb739b90 + drm/i915/huc: Fix fence not released on early probe errors +drm-xe drm-xe-fixes 88ecb66b9956a14577d513a6c8c28bb2e7989703 + drm/xe: Restore EIO errno return when GuC PC start fails +drm drm-next 0af2f6be1b4281385b618cb86ad946eded089ac8 + Linux 6.15-rc1 +drm-misc drm-misc-next-fixes 85a063b8b281e144ed96463936fb4e6b3d4fe9e4 + drm/i2c: tda998x: select CONFIG_DRM_KMS_HELPER +drm-intel drm-intel-next-fixes 0af2f6be1b4281385b618cb86ad946eded089ac8 + Linux 6.15-rc1 +drm-xe drm-xe-next-fixes 5e66cf6edddb5f6237e3afb07475ace57ecb56bc + drm/xe: Fix unmet direct dependencies warning +drm-misc drm-misc-next 4c962bc929f1734d209a0862359e25fef8f56fa0 + drm/hisilicon/hibmc: Add vga connector detect functions +drm-intel drm-intel-next 1954629dc649b25071eec0d353288c5ee303e358 + drm/i915/debugfs: move PCH type to display caps +drm-intel drm-intel-gt-next 795dbde92fe5c6996a02a5b579481de73035e7bf + drm/i915/huc: Fix fence not released on early probe errors +drm-xe drm-xe-next d11c5a928a6e1d786e25a9284ef59bf58a02cf0d + drm/xe/vf: Don't expose privileged GT debugfs files if VF +drm-intel topic/core-for-CI abfe4d1d04b5806ffcf552b2482b8b279846f152 + Revert "lockdep: Enable PROVE_RAW_LOCK_NESTING with PROVE_LOCKING." +drm-xe topic/xe-for-CI b7aa26cfe2bf552630ece2cce510350d1360b575 + drm/xe/pm: Re-enable D3Cold by default on BMG From dcb5e6d1ddcf5a8a2e771095a253dba29c72162c Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Tue, 29 Apr 2025 20:00:55 +0530 Subject: [PATCH 21/21] drm/i915/vrr: Program EMP_AS_SDP_TL for DP AS SDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The register EMP_AS_SDP_TL (MTL) was introduced for configuring the double buffering point and transmission line for HDMI Video Timing Extended Metadata Packet (VTEMP) for VRR. This was also intended to be configured for DP to HDMI2.1 PCON to support VRR. >From BMG and LNL+ onwards, this register was extended to Display Port Adaptive Sync SDP to have a common register to configure double buffering point and transmission line for both HDMI and DP VRR related packets. Currently, we do not support VRR for either native HDMI or via PCON. However we need to configure this for DP SDP case. As per the spec, program the register to set Vsync start as the double buffering point for DP AS SDP. Bspec:70984, 71197 Signed-off-by: Ankit Nautiyal Tested-by: Jouni Högander --- drivers/gpu/drm/i915/display/intel_vrr.c | 20 +++++++++++++++++++ drivers/gpu/drm/i915/display/intel_vrr_regs.h | 6 ++++++ 2 files changed, 26 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 633a66f6b73be3..39706a458d3b4d 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -573,6 +573,22 @@ bool intel_vrr_always_use_vrr_tg(struct intel_display *display) return false; } +static +void intel_vrr_set_emp_as_sdp_tl(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + + /* + * For BMG and LNL+ onwards the EMP_AS_SDP_TL is used for programming + * double buffering point and transmission line for Adaptive Sync SDP. + */ + if (DISPLAY_VERx100(display) == 1401 || DISPLAY_VER(display) >= 20) + intel_de_write(display, + EMP_AS_SDP_TL(display, cpu_transcoder), + EMP_AS_SDP_DB_TL(crtc_state->vrr.vsync_start)); +} + void intel_vrr_enable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); @@ -592,6 +608,8 @@ void intel_vrr_enable(const struct intel_crtc_state *crtc_state) TRANS_PUSH_EN); if (!intel_vrr_always_use_vrr_tg(display)) { + intel_vrr_set_emp_as_sdp_tl(crtc_state); + if (crtc_state->cmrr.enable) { intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), VRR_CTL_VRR_ENABLE | VRR_CTL_CMRR_ENABLE | @@ -643,6 +661,8 @@ void intel_vrr_transcoder_enable(const struct intel_crtc_state *crtc_state) intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), TRANS_PUSH_EN); + intel_vrr_set_emp_as_sdp_tl(crtc_state); + intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), VRR_CTL_VRR_ENABLE | trans_vrr_ctl(crtc_state)); } diff --git a/drivers/gpu/drm/i915/display/intel_vrr_regs.h b/drivers/gpu/drm/i915/display/intel_vrr_regs.h index 6ed0e0dc97e76d..d2af1b6710bf15 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr_regs.h +++ b/drivers/gpu/drm/i915/display/intel_vrr_regs.h @@ -108,6 +108,12 @@ #define VRR_VSYNC_START_MASK REG_GENMASK(12, 0) #define VRR_VSYNC_START(vsync_start) REG_FIELD_PREP(VRR_VSYNC_START_MASK, (vsync_start)) +/* Common register for HDMI VTEMP and DP AS SDP */ +#define _EMP_AS_SDP_TL_A 0x60204 +#define EMP_AS_SDP_DB_TL_MASK REG_GENMASK(12, 0) +#define EMP_AS_SDP_TL(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _EMP_AS_SDP_TL_A) +#define EMP_AS_SDP_DB_TL(db_transmit_line) REG_FIELD_PREP(EMP_AS_SDP_DB_TL_MASK, (db_transmit_line)) + /*CMRR Registers*/ #define _TRANS_CMRR_M_LO_A 0x604F0