Discussion:
[PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.
(too old to reply)
Tomasz Lis
2018-03-27 15:17:59 UTC
Permalink
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

Bspec: 18922
Signed-off-by: Tomasz Lis <***@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 2 ++
drivers/gpu/drm/i915/i915_pci.c | 3 ++-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 45 +++++++++++++++++++++++++++-----
drivers/gpu/drm/i915/intel_lrc.h | 1 +
5 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)

#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1

static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)

#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
const struct i915_request *last,
int prio)
{
- return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+ return (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
+ prio > max(rq_prio(last), 0);
}

/**
@@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}

+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg != NULL);
+
+ /* trigger preemption to idle */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+ execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -594,7 +616,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
goto unlock;

if (need_preempt(engine, last, execlists->queue_priority)) {
- inject_preempt_context(engine);
+ if (HAS_HW_PREEMPT_TO_IDLE(engine->i915))
+ gen11_preempt_to_idle(engine);
+ else
+ inject_preempt_context(engine);
goto unlock;
}

@@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
status, buf[2*head + 1],
execlists->active);

- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
+ /* Check if switched to active or preempted to active */
+ if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED)) &&
+ !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
execlists_set_active(execlists,
EXECLISTS_ACTIVE_HWACK);
+
if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_HWACK);
@@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
/* We should never get a COMPLETED | IDLE_ACTIVE! */
GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);

- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);

execlists_cancel_port_requests(execlists);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 59d7b86..958d1b3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510)
#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550)
#define EL_CTRL_LOAD (1 << 0)
+#define EL_CTRL_PREEMPT_TO_IDLE (1 << 1)

/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
--
2.7.4
Patchwork
2018-03-27 15:40:32 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists.
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
96268839cd00 drm/i915/gen11: Preempt-to-idle support in execlists.
-:97: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "execlists->ctrl_reg"
#97: FILE: drivers/gpu/drm/i915/intel_lrc.c:551:
+ GEM_BUG_ON(execlists->ctrl_reg != NULL);

-:149: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#149: FILE: drivers/gpu/drm/i915/intel_lrc.c:1013:
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
^

total: 0 errors, 0 warnings, 2 checks, 114 lines checked
Patchwork
2018-03-27 15:56:06 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists.
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

Series 40747v1 drm/i915/gen11: Preempt-to-idle support in execlists.
https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/1/mbox/

---- Known issues:

Test gem_mmap_gtt:
Subgroup basic-small-bo-tiledx:
pass -> FAIL (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-b:
dmesg-warn -> PASS (fi-cnl-y3) fdo#104951
Test prime_vgem:
Subgroup basic-fence-flip:
pass -> FAIL (fi-ilk-650) fdo#104008

fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#104951 https://bugs.freedesktop.org/show_bug.cgi?id=104951
fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008

fi-bdw-5557u total:285 pass:264 dwarn:0 dfail:0 fail:0 skip:21 time:434s
fi-bdw-gvtdvm total:285 pass:261 dwarn:0 dfail:0 fail:0 skip:24 time:451s
fi-blb-e6850 total:285 pass:220 dwarn:1 dfail:0 fail:0 skip:64 time:383s
fi-bsw-n3050 total:285 pass:239 dwarn:0 dfail:0 fail:0 skip:46 time:547s
fi-bwr-2160 total:285 pass:180 dwarn:0 dfail:0 fail:0 skip:105 time:302s
fi-bxt-dsi total:285 pass:255 dwarn:0 dfail:0 fail:0 skip:30 time:514s
fi-bxt-j4205 total:285 pass:256 dwarn:0 dfail:0 fail:0 skip:29 time:515s
fi-byt-j1900 total:285 pass:250 dwarn:0 dfail:0 fail:0 skip:35 time:529s
fi-byt-n2820 total:285 pass:246 dwarn:0 dfail:0 fail:0 skip:39 time:514s
fi-cfl-8700k total:285 pass:257 dwarn:0 dfail:0 fail:0 skip:28 time:411s
fi-cfl-s3 total:285 pass:259 dwarn:0 dfail:0 fail:0 skip:26 time:573s
fi-cfl-u total:285 pass:259 dwarn:0 dfail:0 fail:0 skip:26 time:515s
fi-cnl-y3 total:285 pass:259 dwarn:0 dfail:0 fail:0 skip:26 time:594s
fi-elk-e7500 total:285 pass:225 dwarn:1 dfail:0 fail:0 skip:59 time:430s
fi-gdg-551 total:285 pass:176 dwarn:0 dfail:0 fail:1 skip:108 time:326s
fi-glk-1 total:285 pass:257 dwarn:0 dfail:0 fail:0 skip:28 time:537s
fi-hsw-4770 total:285 pass:258 dwarn:0 dfail:0 fail:0 skip:27 time:404s
fi-ilk-650 total:285 pass:224 dwarn:0 dfail:0 fail:1 skip:60 time:421s
fi-ivb-3520m total:285 pass:256 dwarn:0 dfail:0 fail:0 skip:29 time:476s
fi-ivb-3770 total:285 pass:252 dwarn:0 dfail:0 fail:0 skip:33 time:434s
fi-kbl-7500u total:285 pass:260 dwarn:1 dfail:0 fail:0 skip:24 time:479s
fi-kbl-7567u total:285 pass:265 dwarn:0 dfail:0 fail:0 skip:20 time:471s
fi-kbl-r total:285 pass:258 dwarn:0 dfail:0 fail:0 skip:27 time:521s
fi-pnv-d510 total:285 pass:219 dwarn:1 dfail:0 fail:0 skip:65 time:660s
fi-skl-6260u total:285 pass:265 dwarn:0 dfail:0 fail:0 skip:20 time:456s
fi-skl-6600u total:285 pass:258 dwarn:0 dfail:0 fail:0 skip:27 time:531s
fi-skl-6700k2 total:285 pass:261 dwarn:0 dfail:0 fail:0 skip:24 time:508s
fi-skl-6770hq total:285 pass:265 dwarn:0 dfail:0 fail:0 skip:20 time:491s
fi-skl-guc total:285 pass:257 dwarn:0 dfail:0 fail:0 skip:28 time:430s
fi-skl-gvtdvm total:285 pass:262 dwarn:0 dfail:0 fail:0 skip:23 time:449s
fi-snb-2520m total:285 pass:245 dwarn:0 dfail:0 fail:0 skip:40 time:584s
fi-snb-2600 total:285 pass:245 dwarn:0 dfail:0 fail:0 skip:40 time:403s
Blacklisted hosts:
fi-cnl-psr total:285 pass:255 dwarn:3 dfail:0 fail:1 skip:26 time:534s
fi-glk-j4005 total:285 pass:256 dwarn:0 dfail:0 fail:0 skip:29 time:495s

ff7820832182a0f4bebf9092a74ab17f8b3ae7ef drm-tip: 2018y-03m-27d-14h-31m-00s UTC integration manifest
96268839cd00 drm/i915/gen11: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8505/issues.html
Patchwork
2018-03-27 20:50:48 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists.
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

---- Known issues:

Test kms_cursor_legacy:
Subgroup flip-vs-cursor-atomic:
fail -> PASS (shard-hsw) fdo#102670
Test kms_flip:
Subgroup 2x-dpms-vs-vblank-race-interruptible:
pass -> FAIL (shard-hsw) fdo#103060
Subgroup 2x-flip-vs-wf_vblank:
fail -> PASS (shard-hsw) fdo#100368 +1
Test kms_sysfs_edid_timing:
pass -> WARN (shard-apl) fdo#100047

fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#100047 https://bugs.freedesktop.org/show_bug.cgi?id=100047

shard-apl total:3495 pass:1831 dwarn:1 dfail:0 fail:7 skip:1655 time:12913s
shard-hsw total:3495 pass:1781 dwarn:1 dfail:0 fail:3 skip:1709 time:11676s
shard-snb total:3495 pass:1374 dwarn:1 dfail:0 fail:3 skip:2117 time:7015s
Blacklisted hosts:
shard-kbl total:3493 pass:1950 dwarn:5 dfail:0 fail:9 skip:1528 time:9549s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8505/shards.html
Chris Wilson
2018-03-27 23:27:10 UTC
Permalink
Quoting Tomasz Lis (2018-03-27 16:17:59)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
Bspec: 18922
---
drivers/gpu/drm/i915/i915_drv.h | 2 ++
drivers/gpu/drm/i915/i915_pci.c | 3 ++-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 45 +++++++++++++++++++++++++++-----
drivers/gpu/drm/i915/intel_lrc.h | 1 +
5 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)
#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1
static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
const struct i915_request *last,
int prio)
{
- return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+ return (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
Well, you haven't actually disabled allocating the preempt_context so...

But at any rate, making this an engine->flag would eliminate one pointer
dance.
Post by Tomasz Lis
+ prio > max(rq_prio(last), 0);
}
/**
@@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg != NULL);
+
+ /* trigger preemption to idle */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
Future plans? Because just inserting the branch into the setter of
inject_preempt_context() resolves a lot of conflicts with other work.
Post by Tomasz Lis
@@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
status, buf[2*head + 1],
execlists->active);
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
+ /* Check if switched to active or preempted to active */
+ if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED)) &&
+ !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
Setting HWACK here is harmless as it gets cleared again. Unless, there
is some oddity in the code flow.
Post by Tomasz Lis
execlists_set_active(execlists,
EXECLISTS_ACTIVE_HWACK);
+
if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_HWACK);
@@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
/* We should never get a COMPLETED | IDLE_ACTIVE! */
GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
Hmm. I was hoping that we would be able to engineer a single check to
cover all sins. Might have been overly optimistic, but I can dream.
-Chris
Lis, Tomasz
2018-03-28 16:06:58 UTC
Permalink
Post by Chris Wilson
Quoting Tomasz Lis (2018-03-27 16:17:59)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
Bspec: 18922
---
drivers/gpu/drm/i915/i915_drv.h | 2 ++
drivers/gpu/drm/i915/i915_pci.c | 3 ++-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 45 +++++++++++++++++++++++++++-----
drivers/gpu/drm/i915/intel_lrc.h | 1 +
5 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)
#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1
static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
const struct i915_request *last,
int prio)
{
- return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+ return (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
Well, you haven't actually disabled allocating the preempt_context so...
Yes.. I had mixed feelings about changing needs_preempt_context() now,
as that would mean adding a temporary condition on GuC until the GuC
preemption is merged.
I will add the conditions and disable the allocation in v2 of the patch.
Post by Chris Wilson
But at any rate, making this an engine->flag would eliminate one pointer
dance.
Could be an interesting idea for a separate patch.
Post by Chris Wilson
Post by Tomasz Lis
+ prio > max(rq_prio(last), 0);
}
/**
@@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg != NULL);
+
+ /* trigger preemption to idle */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
Future plans? Because just inserting the branch into the setter of
inject_preempt_context() resolves a lot of conflicts with other work.
My arguments for separate function are:
- better code readability
- keeping the symmetry between execlist and GuC flow - GuC preemption
patches will introduce separate function as well
- only 4 lines of the function would be common
- the name inject_preempt_context() wouldn't match the new purpose, so
renaming would be needed
- reduced self-documenting code due to two separate preempt methods not
having distinct names

That's all, I don't have any future plans for it. If you want me to
merge the two, let me know.
Post by Chris Wilson
Post by Tomasz Lis
@@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
status, buf[2*head + 1],
execlists->active);
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
+ /* Check if switched to active or preempted to active */
+ if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED)) &&
+ !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
Setting HWACK here is harmless as it gets cleared again. Unless, there
is some oddity in the code flow.
I will check if lack of the change affects test results.
Personally, I would keep this change, even if only for allowing simple
definition of what HWACK flag means.
Post by Chris Wilson
Post by Tomasz Lis
execlists_set_active(execlists,
EXECLISTS_ACTIVE_HWACK);
+
if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_HWACK);
@@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
/* We should never get a COMPLETED | IDLE_ACTIVE! */
GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
Hmm. I was hoping that we would be able to engineer a single check to
cover all sins. Might have been overly optimistic, but I can dream.
-Chris
I don't see any way to do that, besides creating separate function for
gen11.
Chris Wilson
2018-03-28 22:28:11 UTC
Permalink
Quoting Lis, Tomasz (2018-03-28 17:06:58)
Post by Lis, Tomasz
Post by Chris Wilson
Quoting Tomasz Lis (2018-03-27 16:17:59)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
Bspec: 18922
---
drivers/gpu/drm/i915/i915_drv.h | 2 ++
drivers/gpu/drm/i915/i915_pci.c | 3 ++-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 45 +++++++++++++++++++++++++++-----
drivers/gpu/drm/i915/intel_lrc.h | 1 +
5 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)
#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1
static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
const struct i915_request *last,
int prio)
{
- return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+ return (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
Well, you haven't actually disabled allocating the preempt_context so...
Yes.. I had mixed feelings about changing needs_preempt_context() now,
as that would mean adding a temporary condition on GuC until the GuC
preemption is merged.
I will add the conditions and disable the allocation in v2 of the patch.
Post by Chris Wilson
But at any rate, making this an engine->flag would eliminate one pointer
dance.
Could be an interesting idea for a separate patch.
To land first ;)
Post by Lis, Tomasz
Post by Chris Wilson
Post by Tomasz Lis
+ prio > max(rq_prio(last), 0);
}
/**
@@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg != NULL);
+
+ /* trigger preemption to idle */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
Future plans? Because just inserting the branch into the setter of
inject_preempt_context() resolves a lot of conflicts with other work.
- better code readability
- keeping the symmetry between execlist and GuC flow - GuC preemption
patches will introduce separate function as well
- only 4 lines of the function would be common
- the name inject_preempt_context() wouldn't match the new purpose, so
renaming would be needed
- reduced self-documenting code due to two separate preempt methods not
having distinct names
That's all, I don't have any future plans for it. If you want me to
merge the two, let me know.
The problem that I am worrying about is that we will duplicate bunch of
other code, the actual ELS[PQ] write is the smaller portion. Plus we
already have the branch on something much more pleasant.
Post by Lis, Tomasz
Post by Chris Wilson
Post by Tomasz Lis
@@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
status, buf[2*head + 1],
execlists->active);
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
+ /* Check if switched to active or preempted to active */
+ if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED)) &&
+ !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
Setting HWACK here is harmless as it gets cleared again. Unless, there
is some oddity in the code flow.
I will check if lack of the change affects test results.
Personally, I would keep this change, even if only for allowing simple
definition of what HWACK flag means.
The simple definition is the opposite one, imo. We set the flag after we
get the corresponding response from HW; any preemption or activate event
must follow the most recent ELSP write. So that will include the
preemption event following the preempt-idle write.

Then on deciding that the HW is idle, we apply the complication such
that execlists->active == 0. (That rule is what breaks the pattern.)
-Chris
Lis, Tomasz
2018-03-30 15:42:02 UTC
Permalink
Post by Chris Wilson
Quoting Lis, Tomasz (2018-03-28 17:06:58)
Post by Lis, Tomasz
Post by Chris Wilson
Quoting Tomasz Lis (2018-03-27 16:17:59)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
Bspec: 18922
---
drivers/gpu/drm/i915/i915_drv.h | 2 ++
drivers/gpu/drm/i915/i915_pci.c | 3 ++-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 45 +++++++++++++++++++++++++++-----
drivers/gpu/drm/i915/intel_lrc.h | 1 +
5 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)
#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1
static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
const struct i915_request *last,
int prio)
{
- return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+ return (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
Well, you haven't actually disabled allocating the preempt_context so...
Yes.. I had mixed feelings about changing needs_preempt_context() now,
as that would mean adding a temporary condition on GuC until the GuC
preemption is merged.
I will add the conditions and disable the allocation in v2 of the patch.
Post by Chris Wilson
But at any rate, making this an engine->flag would eliminate one pointer
dance.
Could be an interesting idea for a separate patch.
To land first ;)
:)
Sure, I can do that.
Post by Chris Wilson
Post by Lis, Tomasz
Post by Chris Wilson
Post by Tomasz Lis
+ prio > max(rq_prio(last), 0);
}
/**
@@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg != NULL);
+
+ /* trigger preemption to idle */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
Future plans? Because just inserting the branch into the setter of
inject_preempt_context() resolves a lot of conflicts with other work.
- better code readability
- keeping the symmetry between execlist and GuC flow - GuC preemption
patches will introduce separate function as well
- only 4 lines of the function would be common
- the name inject_preempt_context() wouldn't match the new purpose, so
renaming would be needed
- reduced self-documenting code due to two separate preempt methods not
having distinct names
That's all, I don't have any future plans for it. If you want me to
merge the two, let me know.
The problem that I am worrying about is that we will duplicate bunch of
other code, the actual ELS[PQ] write is the smaller portion. Plus we
already have the branch on something much more pleasant.
I see. I don't know any details there, so I'm not able to weigh that.
Just let me know whether this possible duplication outweights the
arguments I provided, and I will merge these functions.
I'm not overly attached to my solution.
Post by Chris Wilson
Post by Lis, Tomasz
Post by Chris Wilson
Post by Tomasz Lis
@@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
status, buf[2*head + 1],
execlists->active);
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
+ /* Check if switched to active or preempted to active */
+ if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED)) &&
+ !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
Setting HWACK here is harmless as it gets cleared again. Unless, there
is some oddity in the code flow.
I will check if lack of the change affects test results.
Personally, I would keep this change, even if only for allowing simple
definition of what HWACK flag means.
The simple definition is the opposite one, imo. We set the flag after we
get the corresponding response from HW; any preemption or activate event
must follow the most recent ELSP write. So that will include the
preemption event following the preempt-idle write.
Then on deciding that the HW is idle, we apply the complication such
that execlists->active == 0. (That rule is what breaks the pattern.)
-Chris
Ok, I will remove this unnecessary condition.
I tested this and lack of it doesn't seem to affect the results.
(I'll be out next week; expect v2 when I'm back)
-Tomasz
Daniele Ceraolo Spurio
2018-03-30 19:45:43 UTC
Permalink
Post by Lis, Tomasz
Post by Chris Wilson
Quoting Lis, Tomasz (2018-03-28 17:06:58)
Post by Lis, Tomasz
Post by Chris Wilson
Quoting Tomasz Lis (2018-03-27 16:17:59)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
Bspec: 18922
---
   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
   drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
   drivers/gpu/drm/i915/intel_device_info.h |  1 +
   drivers/gpu/drm/i915/intel_lrc.c         | 45
+++++++++++++++++++++++++++-----
   drivers/gpu/drm/i915/intel_lrc.h         |  1 +
   5 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h
b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
                  ((dev_priv)->info.has_logical_ring_elsq)
   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
                  ((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+               ((dev_priv)->info.has_hw_preempt_to_idle)
   #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c
b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info
intel_cannonlake_info = {
          GEN(11), \
          .ddb_size = 2048, \
          .has_csr = 0, \
-       .has_logical_ring_elsq = 1
+       .has_logical_ring_elsq = 1, \
+       .has_hw_preempt_to_idle = 1
   static const struct intel_device_info intel_icelake_11_info = {
          GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
          func(has_logical_ring_contexts); \
          func(has_logical_ring_elsq); \
          func(has_logical_ring_preemption); \
+       func(has_hw_preempt_to_idle); \
          func(has_overlay); \
          func(has_pooled_eu); \
          func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c
b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@
   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
   #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
   #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
   #define GEN8_CTX_STATUS_COMPLETED_MASK \
           (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@ static inline bool need_preempt(const struct
intel_engine_cs *engine,
                                  const struct i915_request *last,
                                  int prio)
   {
-       return engine->i915->preempt_context && prio >
max(rq_prio(last), 0);
+       return (engine->i915->preempt_context ||
+               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
Well, you haven't actually disabled allocating the preempt_context so...
Yes.. I had mixed feelings about changing needs_preempt_context() now,
as that would mean adding a temporary condition on GuC until the GuC
preemption is merged.
I will add the conditions and disable the allocation in v2 of the patch.
Post by Chris Wilson
But at any rate, making this an engine->flag would eliminate one pointer
dance.
Could be an interesting idea for a separate patch.
To land first ;)
:)
Sure, I can do that.
Post by Chris Wilson
Post by Lis, Tomasz
Post by Chris Wilson
Post by Tomasz Lis
+                prio > max(rq_prio(last), 0);
   }
   /**
@@ -535,6 +538,25 @@ static void inject_preempt_context(struct
intel_engine_cs *engine)
          execlists_set_active(&engine->execlists,
EXECLISTS_ACTIVE_PREEMPT);
   }
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+       struct intel_engine_execlists *execlists = &engine->execlists;
+
+       GEM_TRACE("%s\n", engine->name);
+
+       /*
+        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+        */
+       GEM_BUG_ON(execlists->ctrl_reg != NULL);
+
+       /* trigger preemption to idle */
+       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
Future plans? Because just inserting the branch into the setter of
inject_preempt_context() resolves a lot of conflicts with other work.
- better code readability
- keeping the symmetry between execlist and GuC flow - GuC preemption
patches will introduce separate function as well
- only 4 lines of the function would be common
- the name inject_preempt_context() wouldn't match the new purpose, so
renaming would be needed
- reduced self-documenting code due to two separate preempt methods not
having distinct names
That's all, I don't have any future plans for it. If you want me to
merge the two, let me know.
The problem that I am worrying about is that we will duplicate bunch of
other code, the actual ELS[PQ] write is the smaller portion. Plus we
already have the branch on something much more pleasant.
I see. I don't know any details there, so I'm not able to weigh that.
Just let me know whether this possible duplication outweights the
arguments I provided, and I will merge these functions.
I'm not overly attached to my solution.
Post by Chris Wilson
Post by Lis, Tomasz
Post by Chris Wilson
Post by Tomasz Lis
@@ -962,10 +987,13 @@ static void
execlists_submission_tasklet(unsigned long data)
                                    status, buf[2*head + 1],
                                    execlists->active);
-                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-                                     GEN8_CTX_STATUS_PREEMPTED))
+                       /* Check if switched to active or preempted
to active */
+                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+                                       GEN8_CTX_STATUS_PREEMPTED)) &&
+                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
Setting HWACK here is harmless as it gets cleared again. Unless, there
is some oddity in the code flow.
I will check if lack of the change affects test results.
Personally, I would keep this change, even if only for allowing simple
definition of what HWACK flag means.
The simple definition is the opposite one, imo. We set the flag after we
get the corresponding response from HW; any preemption or activate event
must follow the most recent ELSP write. So that will include the
preemption event following the preempt-idle write.
Then on deciding that the HW is idle, we apply the complication such
that execlists->active == 0. (That rule is what breaks the pattern.)
-Chris
Ok, I will remove this unnecessary condition.
I tested this and lack of it doesn't seem to affect the results.
(I'll be out next week; expect v2 when I'm back)
-Tomasz
Do we have any test to cover a preempt to idle on already idle HW (which
is the case we cover with this flag here)? If not maybe we cold add a
selftest for that.

Daniele
Post by Lis, Tomasz
_______________________________________________
Intel-gfx mailing list
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lis, Tomasz
2018-04-26 14:02:12 UTC
Permalink
Post by Daniele Ceraolo Spurio
Post by Lis, Tomasz
Post by Chris Wilson
Quoting Lis, Tomasz (2018-03-28 17:06:58)
Post by Lis, Tomasz
Post by Chris Wilson
Quoting Tomasz Lis (2018-03-27 16:17:59)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
Bspec: 18922
---
   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
   drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
   drivers/gpu/drm/i915/intel_device_info.h |  1 +
   drivers/gpu/drm/i915/intel_lrc.c         | 45
+++++++++++++++++++++++++++-----
   drivers/gpu/drm/i915/intel_lrc.h         |  1 +
   5 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h
b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)
   #define HAS_EXECLISTS(dev_priv)
HAS_LOGICAL_RING_CONTEXTS(dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c
b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info
intel_cannonlake_info = {
          GEN(11), \
          .ddb_size = 2048, \
          .has_csr = 0, \
-       .has_logical_ring_elsq = 1
+       .has_logical_ring_elsq = 1, \
+       .has_hw_preempt_to_idle = 1
   static const struct intel_device_info intel_icelake_11_info = {
          GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
          func(has_logical_ring_contexts); \
          func(has_logical_ring_elsq); \
          func(has_logical_ring_preemption); \
+       func(has_hw_preempt_to_idle); \
          func(has_overlay); \
          func(has_pooled_eu); \
          func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c
b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@
   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
   #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
   #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
   #define GEN8_CTX_STATUS_COMPLETED_MASK \
           (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@ static inline bool need_preempt(const struct
intel_engine_cs *engine,
                                  const struct i915_request *last,
                                  int prio)
   {
-       return engine->i915->preempt_context && prio >
max(rq_prio(last), 0);
+       return (engine->i915->preempt_context ||
+               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
Well, you haven't actually disabled allocating the preempt_context so...
Yes.. I had mixed feelings about changing needs_preempt_context() now,
as that would mean adding a temporary condition on GuC until the GuC
preemption is merged.
I will add the conditions and disable the allocation in v2 of the patch.
Post by Chris Wilson
But at any rate, making this an engine->flag would eliminate one pointer
dance.
Could be an interesting idea for a separate patch.
To land first ;)
:)
Sure, I can do that.
Post by Chris Wilson
Post by Lis, Tomasz
Post by Chris Wilson
Post by Tomasz Lis
+                prio > max(rq_prio(last), 0);
   }
   /**
@@ -535,6 +538,25 @@ static void inject_preempt_context(struct
intel_engine_cs *engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
   }
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+       struct intel_engine_execlists *execlists =
&engine->execlists;
+
+       GEM_TRACE("%s\n", engine->name);
+
+       /*
+        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+        */
+       GEM_BUG_ON(execlists->ctrl_reg != NULL);
+
+       /* trigger preemption to idle */
+       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
Future plans? Because just inserting the branch into the setter of
inject_preempt_context() resolves a lot of conflicts with other work.
- better code readability
- keeping the symmetry between execlist and GuC flow - GuC preemption
patches will introduce separate function as well
- only 4 lines of the function would be common
- the name inject_preempt_context() wouldn't match the new purpose, so
renaming would be needed
- reduced self-documenting code due to two separate preempt methods not
having distinct names
That's all, I don't have any future plans for it. If you want me to
merge the two, let me know.
The problem that I am worrying about is that we will duplicate bunch of
other code, the actual ELS[PQ] write is the smaller portion. Plus we
already have the branch on something much more pleasant.
I see. I don't know any details there, so I'm not able to weigh that.
Just let me know whether this possible duplication outweights the
arguments I provided, and I will merge these functions.
I'm not overly attached to my solution.
Post by Chris Wilson
Post by Lis, Tomasz
Post by Chris Wilson
Post by Tomasz Lis
@@ -962,10 +987,13 @@ static void
execlists_submission_tasklet(unsigned long data)
                                    status, buf[2*head + 1],
execlists->active);
-                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
+                       /* Check if switched to active or
preempted to active */
+                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED)) &&
+                           !(status &
GEN11_CTX_STATUS_PREEMPT_IDLE))
Setting HWACK here is harmless as it gets cleared again. Unless, there
is some oddity in the code flow.
I will check if lack of the change affects test results.
Personally, I would keep this change, even if only for allowing simple
definition of what HWACK flag means.
The simple definition is the opposite one, imo. We set the flag after we
get the corresponding response from HW; any preemption or activate event
must follow the most recent ELSP write. So that will include the
preemption event following the preempt-idle write.
Then on deciding that the HW is idle, we apply the complication such
that execlists->active == 0. (That rule is what breaks the pattern.)
-Chris
Ok, I will remove this unnecessary condition.
I tested this and lack of it doesn't seem to affect the results.
(I'll be out next week; expect v2 when I'm back)
-Tomasz
Do we have any test to cover a preempt to idle on already idle HW
(which is the case we cover with this flag here)? If not maybe we cold
add a selftest for that.
Daniele
Looks like this case is not tested.
Also, it looks like there is a bug of some kind. Preemption-specific
tests are passing, but I'm getting fails (with occasional passes) in
smoketest-* cases from gem_exec_schedule.
I am working on diagnosing that.
Post by Daniele Ceraolo Spurio
Post by Lis, Tomasz
_______________________________________________
Intel-gfx mailing list
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniele Ceraolo Spurio
2018-03-30 18:23:15 UTC
Permalink
Post by Chris Wilson
Quoting Tomasz Lis (2018-03-27 16:17:59)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
Bspec: 18922
---
drivers/gpu/drm/i915/i915_drv.h | 2 ++
drivers/gpu/drm/i915/i915_pci.c | 3 ++-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 45 +++++++++++++++++++++++++++-----
drivers/gpu/drm/i915/intel_lrc.h | 1 +
5 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)
#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1
static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
const struct i915_request *last,
int prio)
{
- return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+ return (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
Well, you haven't actually disabled allocating the preempt_context so...
But at any rate, making this an engine->flag would eliminate one pointer
dance.
Can't we re-use I915_SCHEDULER_CAP_PREEMPTION in
engine->i915->caps.scheduler? That btw like here to be set if
i915->preempt_context || HAS_HW_PREEMPT_TO_IDLE(i915)
Post by Chris Wilson
Post by Tomasz Lis
+ prio > max(rq_prio(last), 0);
}
/**
@@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg != NULL);
Shouldn't this check be the other way around?
Post by Chris Wilson
Post by Tomasz Lis
+
+ /* trigger preemption to idle */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
Future plans? Because just inserting the branch into the setter of
inject_preempt_context() resolves a lot of conflicts with other work.
Post by Tomasz Lis
@@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
status, buf[2*head + 1],
execlists->active);
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
+ /* Check if switched to active or preempted to active */
+ if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED)) &&
+ !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
Setting HWACK here is harmless as it gets cleared again. Unless, there
is some oddity in the code flow.
There is actually some oddity, but it is more on the HW side. A preempt
to idle can potentially land on an already idle HW, in which case
GEN8_CTX_STATUS_ACTIVE_IDLE is not set and GEN8_CTX_STATUS_IDLE_ACTIVE
is set instead. In this case without this check on
GEN11_CTX_STATUS_PREEMPT_IDLE we would set the HWACK here but we
wouldn't call the clear below. Not sure if we end up clearing the flag
elsewhere, but that doesn't look too nice IMHO.

BTW, the relevant CSB bits coming out in the 2 preempt to idle cases are
as follows:

preempt active HW:
GEN11_CTX_STATUS_PREEMPT_IDLE | GEN8_CTX_STATUS_ACTIVE_IDLE |
GEN8_CTX_STATUS_PREEMPTED

Preempt idle HW:
GEN11_CTX_STATUS_PREEMPT_IDLE | GEN8_CTX_STATUS_IDLE_ACTIVE

Daniele
Post by Chris Wilson
Post by Tomasz Lis
execlists_set_active(execlists,
EXECLISTS_ACTIVE_HWACK);
+
if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_HWACK);
@@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
/* We should never get a COMPLETED | IDLE_ACTIVE! */
GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
Hmm. I was hoping that we would be able to engineer a single check to
cover all sins. Might have been overly optimistic, but I can dream.
-Chris
_______________________________________________
Intel-gfx mailing list
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lis, Tomasz
2018-04-12 17:15:24 UTC
Permalink
Post by Daniele Ceraolo Spurio
Post by Chris Wilson
Quoting Tomasz Lis (2018-03-27 16:17:59)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
Bspec: 18922
---
  drivers/gpu/drm/i915/i915_drv.h          |  2 ++
  drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
  drivers/gpu/drm/i915/intel_device_info.h |  1 +
  drivers/gpu/drm/i915/intel_lrc.c         | 45
+++++++++++++++++++++++++++-----
  drivers/gpu/drm/i915/intel_lrc.h         |  1 +
  5 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h
b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
                 ((dev_priv)->info.has_logical_ring_elsq)
  #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+               ((dev_priv)->info.has_hw_preempt_to_idle)
    #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
  diff --git a/drivers/gpu/drm/i915/i915_pci.c
b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info
intel_cannonlake_info = {
         GEN(11), \
         .ddb_size = 2048, \
         .has_csr = 0, \
-       .has_logical_ring_elsq = 1
+       .has_logical_ring_elsq = 1, \
+       .has_hw_preempt_to_idle = 1
    static const struct intel_device_info intel_icelake_11_info = {
         GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
         func(has_logical_ring_contexts); \
         func(has_logical_ring_elsq); \
         func(has_logical_ring_preemption); \
+       func(has_hw_preempt_to_idle); \
         func(has_overlay); \
         func(has_pooled_eu); \
         func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c
b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@
  #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
  #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
  #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
    #define GEN8_CTX_STATUS_COMPLETED_MASK \
          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@ static inline bool need_preempt(const struct
intel_engine_cs *engine,
                                 const struct i915_request *last,
                                 int prio)
  {
-       return engine->i915->preempt_context && prio >
max(rq_prio(last), 0);
+       return (engine->i915->preempt_context ||
+               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
Well, you haven't actually disabled allocating the preempt_context so...
But at any rate, making this an engine->flag would eliminate one pointer
dance.
Can't we re-use I915_SCHEDULER_CAP_PREEMPTION in
engine->i915->caps.scheduler? That btw like here to be set if
i915->preempt_context || HAS_HW_PREEMPT_TO_IDLE(i915)
The engine->flag which Chris introduced is now used to set
I915_SCHEDULER_CAP_PREEMPTION.
Post by Daniele Ceraolo Spurio
Post by Chris Wilson
Post by Tomasz Lis
+                prio > max(rq_prio(last), 0);
  }
    /**
@@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
         execlists_set_active(&engine->execlists,
EXECLISTS_ACTIVE_PREEMPT);
  }
  +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+       struct intel_engine_execlists *execlists = &engine->execlists;
+
+       GEM_TRACE("%s\n", engine->name);
+
+       /*
+        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+        */
+       GEM_BUG_ON(execlists->ctrl_reg != NULL);
Shouldn't this check be the other way around?
Wow. I have no idea how I was able to test this patch and not trigger
this. You are right.
Post by Daniele Ceraolo Spurio
Post by Chris Wilson
Post by Tomasz Lis
+
+       /* trigger preemption to idle */
+       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
Future plans? Because just inserting the branch into the setter of
inject_preempt_context() resolves a lot of conflicts with other work.
Post by Tomasz Lis
@@ -962,10 +987,13 @@ static void
execlists_submission_tasklet(unsigned long data)
                                   status, buf[2*head + 1],
                                   execlists->active);
  -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
+                       /* Check if switched to active or preempted
to active */
+                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED)) &&
+                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
Setting HWACK here is harmless as it gets cleared again. Unless, there
is some oddity in the code flow.
There is actually some oddity, but it is more on the HW side. A
preempt to idle can potentially land on an already idle HW, in which
case GEN8_CTX_STATUS_ACTIVE_IDLE is not set and
GEN8_CTX_STATUS_IDLE_ACTIVE is set instead. In this case without this
check on GEN11_CTX_STATUS_PREEMPT_IDLE we would set the HWACK here but
we wouldn't call the clear below. Not sure if we end up clearing the
flag elsewhere, but that doesn't look too nice IMHO.
BTW, the relevant CSB bits coming out in the 2 preempt to idle cases
GEN11_CTX_STATUS_PREEMPT_IDLE | GEN8_CTX_STATUS_ACTIVE_IDLE |
GEN8_CTX_STATUS_PREEMPTED
GEN11_CTX_STATUS_PREEMPT_IDLE | GEN8_CTX_STATUS_IDLE_ACTIVE
Daniele
Thanks Daniele, this makes things a lot clearer.
Considering also HWACK description from Chris, I will add a condition to
execlists_clear_active() below instead of  here.
-Tomasz
Post by Daniele Ceraolo Spurio
Post by Chris Wilson
Post by Tomasz Lis
execlists_set_active(execlists,
EXECLISTS_ACTIVE_HWACK);
+
                         if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_HWACK);
@@ -976,8 +1004,13 @@ static void
execlists_submission_tasklet(unsigned long data)
                         /* We should never get a COMPLETED |
IDLE_ACTIVE! */
                         GEM_BUG_ON(status &
GEN8_CTX_STATUS_IDLE_ACTIVE);
  -                       if (status & GEN8_CTX_STATUS_COMPLETE &&
-                           buf[2*head + 1] ==
execlists->preempt_complete_status) {
+                       /*
+                        * Check if preempted to real idle, either
directly or
+                        * the preemptive context already finished
executing
+                        */
+                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+                           (status & GEN8_CTX_STATUS_COMPLETE &&
+                           buf[2*head + 1] ==
execlists->preempt_complete_status)) {
                                 GEM_TRACE("%s preempt-idle\n",
engine->name);
Hmm. I was hoping that we would be able to engineer a single check to
cover all sins. Might have been overly optimistic, but I can dream.
-Chris
_______________________________________________
Intel-gfx mailing list
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Tomasz Lis
2018-04-19 11:44:48 UTC
Permalink
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

Bspec: 18922
Signed-off-by: Tomasz Lis <***@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 2 ++
drivers/gpu/drm/i915/i915_gem_context.c | 4 ++-
drivers/gpu/drm/i915/i915_pci.c | 3 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 47 ++++++++++++++++++++++++++++----
drivers/gpu/drm/i915/intel_lrc.h | 1 +
6 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0286911..f445340 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2518,6 +2518,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)

#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 74435af..d65f469 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,7 +454,9 @@ destroy_kernel_context(struct i915_gem_context **ctxp)

static bool needs_preempt_context(struct drm_i915_private *i915)
{
- return HAS_LOGICAL_RING_PREEMPTION(i915);
+ return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+ !HAS_HW_PREEMPT_TO_IDLE(i915) &&
+ !USES_GUC_SUBMISSION(i915);
}

int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1

static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 029901a..4c94488 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)

#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -552,6 +553,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}

+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);
+
+ /* trigger preemption to idle */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+ execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -611,7 +631,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
goto unlock;

if (need_preempt(engine, last, execlists->queue_priority)) {
- inject_preempt_context(engine);
+ if (HAS_HW_PREEMPT_TO_IDLE(engine->i915))
+ gen11_preempt_to_idle(engine);
+ else
+ inject_preempt_context(engine);
goto unlock;
}

@@ -1010,7 +1033,15 @@ static void execlists_submission_tasklet(unsigned long data)
GEN8_CTX_STATUS_PREEMPTED))
execlists_set_active(execlists,
EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+
+ /*
+ * Check if switched to idle or preempted to idle.
+ * The STATUS_IDLE_ACTIVE flag is really used to mark
+ * preemtion from idle to idle, this is not a mistake.
+ */
+ if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
+ ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)))
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_HWACK);

@@ -1020,8 +1051,13 @@ static void execlists_submission_tasklet(unsigned long data)
/* We should never get a COMPLETED | IDLE_ACTIVE! */
GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);

- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);

execlists_cancel_port_requests(execlists);
@@ -2157,7 +2193,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;

engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;

engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 59d7b86..958d1b3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510)
#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550)
#define EL_CTRL_LOAD (1 << 0)
+#define EL_CTRL_PREEMPT_TO_IDLE (1 << 1)

/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
--
2.7.4
Chris Wilson
2018-04-19 12:00:24 UTC
Permalink
Quoting Tomasz Lis (2018-04-19 12:44:48)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
Bspec: 18922
---
drivers/gpu/drm/i915/i915_drv.h | 2 ++
drivers/gpu/drm/i915/i915_gem_context.c | 4 ++-
drivers/gpu/drm/i915/i915_pci.c | 3 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 47 ++++++++++++++++++++++++++++----
drivers/gpu/drm/i915/intel_lrc.h | 1 +
6 files changed, 51 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0286911..f445340 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2518,6 +2518,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)
#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 74435af..d65f469 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,7 +454,9 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
static bool needs_preempt_context(struct drm_i915_private *i915)
{
- return HAS_LOGICAL_RING_PREEMPTION(i915);
+ return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+ !HAS_HW_PREEMPT_TO_IDLE(i915) &&
+ !USES_GUC_SUBMISSION(i915);
Pardon? The guc uses the preempt_context for its preempt_client.
Post by Tomasz Lis
int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1
static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 029901a..4c94488 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -552,6 +553,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);
+
+ /* trigger preemption to idle */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+ execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -611,7 +631,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
goto unlock;
if (need_preempt(engine, last, execlists->queue_priority)) {
- inject_preempt_context(engine);
+ if (HAS_HW_PREEMPT_TO_IDLE(engine->i915))
+ gen11_preempt_to_idle(engine);
+ else
+ inject_preempt_context(engine);
Please do move this to inject_preempt_context. The conflict with other
work in flight is not worth the hassle, especially to reiterate such
points as we already have the equivalent machine check and so avoid
repeating it in even more pointer dancing.
Post by Tomasz Lis
goto unlock;
}
@@ -1010,7 +1033,15 @@ static void execlists_submission_tasklet(unsigned long data)
GEN8_CTX_STATUS_PREEMPTED))
execlists_set_active(execlists,
EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+
+ /*
+ * Check if switched to idle or preempted to idle.
+ * The STATUS_IDLE_ACTIVE flag is really used to mark
+ * preemtion from idle to idle, this is not a mistake.
+ */
+ if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
+ ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)))
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_HWACK);
But still pointless, no?
Post by Tomasz Lis
@@ -1020,8 +1051,13 @@ static void execlists_submission_tasklet(unsigned long data)
/* We should never get a COMPLETED | IDLE_ACTIVE! */
GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
execlists_cancel_port_requests(execlists);
@@ -2157,7 +2193,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-Chris
Daniele Ceraolo Spurio
2018-04-19 22:23:29 UTC
Permalink
<snip>
Post by Chris Wilson
Post by Tomasz Lis
@@ -1010,7 +1033,15 @@ static void execlists_submission_tasklet(unsigned long data)
GEN8_CTX_STATUS_PREEMPTED))
execlists_set_active(execlists,
EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+
+ /*
+ * Check if switched to idle or preempted to idle.
+ * The STATUS_IDLE_ACTIVE flag is really used to mark
+ * preemtion from idle to idle, this is not a mistake.
+ */
+ if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
+ ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)))
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_HWACK);
But still pointless, no?
Just to understand, is it pointless because we have a preemption in
flight and we're thus going to call execlists_dequeue below, which will
eventually clear the flag in execlists_submit_ports? Or do we just don't
care if this gets cleared here because we always clear it before a write
to the elsp and we're only interested in it being clear between the
write and the subsequent csb event?

Also, now that I think about it, with the current flow it doesn't look
like we would clear EXECLISTS_ACTIVE_PREEMPT if a preempt-to-idle
happens on idle HW, so we still need a condition for that even if we
drop the one for EXECLISTS_ACTIVE_HWACK.

Daniele
Post by Chris Wilson
Post by Tomasz Lis
@@ -1020,8 +1051,13 @@ static void execlists_submission_tasklet(unsigned long data)
/* We should never get a COMPLETED | IDLE_ACTIVE! */
GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
execlists_cancel_port_requests(execlists);
@@ -2157,7 +2193,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-Chris
Patchwork
2018-04-19 11:58:29 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev2)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
8e4bae99c558 drm/i915/gen11: Preempt-to-idle support in execlists.
-:107: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "!execlists->ctrl_reg"
#107: FILE: drivers/gpu/drm/i915/intel_lrc.c:566:
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);

-:160: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#160: FILE: drivers/gpu/drm/i915/intel_lrc.c:1060:
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
^

total: 0 errors, 0 warnings, 2 checks, 124 lines checked
Patchwork
2018-04-19 11:59:14 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev2)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/gen11: Preempt-to-idle support in execlists.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3656:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3658:16: warning: expression using sizeof(void)
Patchwork
2018-04-19 12:13:41 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev2)
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4068 -> Patchwork_8751 =

== Summary - SUCCESS ==

No regressions found.

External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/2/mbox/

== Known issues ==

Here are the changes found in Patchwork_8751 that come from known issues:

=== IGT changes ===

==== Issues hit ====

***@gem_exec_suspend@basic-s4-devices:
fi-kbl-7500u: PASS -> DMESG-WARN (fdo#105128)

***@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
fi-ivb-3520m: PASS -> DMESG-WARN (fdo#106084)

***@prime_vgem@basic-gtt:
fi-glk-1: NOTRUN -> INCOMPLETE (k.org#198133, fdo#103359)


fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
fdo#105128 https://bugs.freedesktop.org/show_bug.cgi?id=105128
fdo#106084 https://bugs.freedesktop.org/show_bug.cgi?id=106084
k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (34 -> 32) ==

Additional (1): fi-glk-1
Missing (3): fi-ctg-p8600 fi-ilk-m540 fi-skl-6700hq


== Build changes ==

* Linux: CI_DRM_4068 -> Patchwork_8751

CI_DRM_4068: 28fecc12e5c2b1beb9ab89e3616266d5d5e58e3d @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4441: 83ba5b7d3bde48b383df41792fc9c955a5a23bdb @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_8751: 8e4bae99c5587cb819b3ebb7a22dd8d75883be1b @ git://anongit.freedesktop.org/gfx-ci/linux
piglit_4441: e60d247eb359f044caf0c09904da14e39d7adca1 @ git://anongit.freedesktop.org/piglit


== Linux commits ==

8e4bae99c558 drm/i915/gen11: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8751/issues.html
Patchwork
2018-04-19 16:08:31 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev2)
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4068_full -> Patchwork_8751_full =

== Summary - WARNING ==

Minor unknown changes coming with Patchwork_8751_full need to be verified
manually.

If you think the reported changes have nothing to do with the changes
introduced in Patchwork_8751_full, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.

External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/2/mbox/

== Possible new issues ==

Here are the unknown changes that may have been introduced in Patchwork_8751_full:

=== IGT changes ===

==== Warnings ====

***@gem_mmap_wc@set-cache-level:
shard-glk: PASS -> SKIP +71

***@gem_mocs_settings@mocs-rc6-bsd1:
shard-kbl: SKIP -> PASS

***@kms_mmap_write_crc:
shard-glk: SKIP -> PASS +93

***@perf_pmu@rc6:
shard-kbl: PASS -> SKIP


== Known issues ==

Here are the changes found in Patchwork_8751_full that come from known issues:

=== IGT changes ===

==== Issues hit ====

***@kms_cursor_legacy@flip-vs-cursor-toggle:
shard-hsw: PASS -> FAIL (fdo#102670) +1

***@kms_flip@2x-flip-vs-dpms-interruptible:
shard-hsw: PASS -> DMESG-WARN (fdo#102614)

***@kms_flip@flip-vs-wf_vblank-interruptible:
shard-glk: SKIP -> FAIL (fdo#100368)

***@kms_flip@modeset-vs-vblank-race-interruptible:
shard-hsw: PASS -> FAIL (fdo#103060)

***@kms_flip@wf_vblank-ts-check-interruptible:
shard-apl: PASS -> FAIL (fdo#100368)


==== Possible fixes ====

***@gem_ppgtt@blt-vs-render-ctx0:
shard-kbl: INCOMPLETE (fdo#106023, fdo#103665) -> PASS

***@kms_flip@2x-wf_vblank-ts-check:
shard-hsw: FAIL (fdo#100368) -> PASS

***@kms_flip@plain-flip-ts-check-interruptible:
shard-glk: FAIL (fdo#100368) -> PASS +2

***@kms_hdmi_inject@inject-audio:
shard-glk: FAIL (fdo#102370) -> PASS

***@kms_setmode@basic:
shard-glk: FAIL (fdo#99912) -> PASS


fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102370 https://bugs.freedesktop.org/show_bug.cgi?id=102370
fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
fdo#106023 https://bugs.freedesktop.org/show_bug.cgi?id=106023
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (6 -> 5) ==

Missing (1): shard-glkb


== Build changes ==

* Linux: CI_DRM_4068 -> Patchwork_8751

CI_DRM_4068: 28fecc12e5c2b1beb9ab89e3616266d5d5e58e3d @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4441: 83ba5b7d3bde48b383df41792fc9c955a5a23bdb @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_8751: 8e4bae99c5587cb819b3ebb7a22dd8d75883be1b @ git://anongit.freedesktop.org/gfx-ci/linux
piglit_4441: e60d247eb359f044caf0c09904da14e39d7adca1 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8751/shards.html
Patchwork
2018-05-11 16:16:20 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev3)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/gen11: Preempt-to-idle support in execlists.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3663:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3665:16: warning: expression using sizeof(void)
Patchwork
2018-05-11 16:15:35 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev3)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
3492dcf9f1e4 drm/i915/gen11: Preempt-to-idle support in execlists.
-:132: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "!execlists->ctrl_reg"
#132: FILE: drivers/gpu/drm/i915/intel_lrc.c:542:
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);

-:185: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#185: FILE: drivers/gpu/drm/i915/intel_lrc.c:1074:
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {

-:186: CHECK:BRACES: Blank lines aren't necessary after an open brace '{'
#186: FILE: drivers/gpu/drm/i915/intel_lrc.c:1075:
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+

-:199: CHECK:BRACES: Unbalanced braces around else statement
#199: FILE: drivers/gpu/drm/i915/intel_lrc.c:1086:
+ else {

-:203: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#203: FILE: drivers/gpu/drm/i915/intel_lrc.c:1090:
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);

-:207: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#207: FILE: drivers/gpu/drm/i915/intel_lrc.c:1094:
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);

-:229: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#229: FILE: drivers/gpu/drm/i915/intel_lrc.c:1112:
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
^

total: 0 errors, 0 warnings, 7 checks, 190 lines checked
Patchwork
2018-05-11 17:46:40 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev3)
URL : https://patchwork.freedesktop.org/series/40747/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4169_full -> Patchwork_8983_full =

== Summary - FAILURE ==

Serious unknown changes coming with Patchwork_8983_full absolutely need to be
verified manually.

If you think the reported changes have nothing to do with the changes
introduced in Patchwork_8983_full, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.

External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/3/mbox/

== Possible new issues ==

Here are the unknown changes that may have been introduced in Patchwork_8983_full:

=== IGT changes ===

==== Possible regressions ====

***@perf_pmu@interrupts-sync:
shard-kbl: PASS -> FAIL


==== Warnings ====

***@gem_pwrite@big-cpu-random:
shard-kbl: SKIP -> PASS


== Known issues ==

Here are the changes found in Patchwork_8983_full that come from known issues:

=== IGT changes ===

==== Issues hit ====

***@kms_concurrent@pipe-c:
shard-hsw: PASS -> DMESG-WARN (fdo#102614)

***@kms_flip@absolute-wf_vblank-interruptible:
shard-glk: PASS -> FAIL (fdo#106087)

***@kms_flip@flip-vs-wf_vblank-interruptible:
shard-glk: PASS -> FAIL (fdo#105312)

***@kms_flip@plain-flip-ts-check-interruptible:
shard-glk: PASS -> FAIL (fdo#100368) +1

***@kms_pipe_crc_basic@nonblocking-crc-pipe-c-frame-sequence:
shard-hsw: PASS -> FAIL (fdo#103481)


==== Possible fixes ====

***@gem_eio@in-flight-10ms:
shard-glk: FAIL (fdo#105957) -> PASS

***@gem_ppgtt@blt-vs-render-ctx0:
shard-kbl: INCOMPLETE (fdo#106023, fdo#103665) -> PASS

***@kms_chv_cursor_fail@pipe-b-64x64-top-edge:
shard-apl: FAIL (fdo#104671, fdo#104724) -> PASS

***@kms_flip@flip-vs-expired-vblank:
shard-glk: FAIL (fdo#102887) -> PASS

***@kms_flip@flip-vs-panning-vs-hang:
shard-snb: DMESG-WARN (fdo#103821) -> PASS

***@kms_flip@modeset-vs-vblank-race-interruptible:
shard-hsw: FAIL (fdo#103060) -> PASS

***@kms_flip@plain-flip-fb-recreate:
shard-glk: FAIL (fdo#100368) -> PASS +1

***@kms_sysfs_edid_timing:
shard-apl: WARN (fdo#100047) -> PASS


fdo#100047 https://bugs.freedesktop.org/show_bug.cgi?id=100047
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#103481 https://bugs.freedesktop.org/show_bug.cgi?id=103481
fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
fdo#103821 https://bugs.freedesktop.org/show_bug.cgi?id=103821
fdo#104671 https://bugs.freedesktop.org/show_bug.cgi?id=104671
fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
fdo#105312 https://bugs.freedesktop.org/show_bug.cgi?id=105312
fdo#105957 https://bugs.freedesktop.org/show_bug.cgi?id=105957
fdo#106023 https://bugs.freedesktop.org/show_bug.cgi?id=106023
fdo#106087 https://bugs.freedesktop.org/show_bug.cgi?id=106087


== Participating hosts (5 -> 5) ==

No changes in participating hosts


== Build changes ==

* Linux: CI_DRM_4169 -> Patchwork_8983

CI_DRM_4169: 05bfe2ceaa9df8f56313507ae01344971fa4f8f4 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4475: 35f08c12aa216d5b62a5b9984b575cee6905098f @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_8983: 3492dcf9f1e429e4bd7fe2b95c5f5a912f5a4ade @ git://anongit.freedesktop.org/gfx-ci/linux
piglit_4475: 3ba0657bff4216d1ec7179935590261855f1651e @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8983/shards.html
Tomasz Lis
2018-05-11 15:45:22 UTC
Permalink
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed context state to not assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.

Bspec: 18922
Signed-off-by: Tomasz Lis <***@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem_context.c | 5 +-
drivers/gpu/drm/i915/i915_pci.c | 3 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 115 ++++++++++++++++++++++---------
drivers/gpu/drm/i915/intel_lrc.h | 1 +
6 files changed, 92 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 57fb3aa..6e9647b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2535,6 +2535,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)

#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 33f8a4b..bdac129 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,7 +454,10 @@ destroy_kernel_context(struct i915_gem_context **ctxp)

static bool needs_preempt_context(struct drm_i915_private *i915)
{
- return HAS_LOGICAL_RING_PREEMPTION(i915);
+ return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+ (!HAS_HW_PREEMPT_TO_IDLE(i915) ||
+ (HAS_HW_PREEMPT_TO_IDLE(i915) &&
+ !USES_GUC_SUBMISSION(i915)));
}

int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1

static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 29dcf34..8fe6795 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)

#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -526,31 +527,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
static void inject_preempt_context(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce =
- to_intel_context(engine->i915->preempt_context, engine);
- unsigned int n;

- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
- GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+ if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+ /*
+ * If we have hardware preempt-to-idle, we do not need to
+ * inject any job to the hardware. We only set a flag.
+ */
+ GEM_TRACE("%s\n", engine->name);

- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
- GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);

- write_desc(execlists, ce->lrc_desc, n);
+ /* trigger preemption to idle */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+ } else {
+ struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+ unsigned int n;

- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ GEM_BUG_ON(execlists->preempt_complete_status !=
+ upper_32_bits(ce->lrc_desc));
+ GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ GEM_TRACE("%s\n", engine->name);
+ for (n = execlists_num_ports(execlists); --n; )
+ write_desc(execlists, 0, n);
+
+ write_desc(execlists, ce->lrc_desc, n);
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ }

execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -1045,22 +1064,51 @@ static void execlists_submission_tasklet(unsigned long data)
status, buf[2*head + 1],
execlists->active);

- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ /*
+ * Check if preempted from idle to idle directly.
+ * The STATUS_IDLE_ACTIVE flag is used to mark
+ * such transition.
+ */
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_HWACK);

- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ /*
+ * We could not have COMPLETED anything
+ * if we were idle before preemption.
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+ }
+
+ else {
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);

- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;

- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /*
+ * We should never get a
+ * COMPLETED | IDLE_ACTIVE!
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ }
+
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);

execlists_cancel_port_requests(execlists);
@@ -2217,7 +2265,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;

engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;

engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4ec7d8d..b1083ac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510)
#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550)
#define EL_CTRL_LOAD (1 << 0)
+#define EL_CTRL_PREEMPT_TO_IDLE (1 << 1)

/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
--
2.7.4
Daniele Ceraolo Spurio
2018-05-18 21:08:38 UTC
Permalink
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed context state to not assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.
Bspec: 18922
---
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem_context.c | 5 +-
drivers/gpu/drm/i915/i915_pci.c | 3 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 115 ++++++++++++++++++++++---------
drivers/gpu/drm/i915/intel_lrc.h | 1 +
6 files changed, 92 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 57fb3aa..6e9647b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2535,6 +2535,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)
#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 33f8a4b..bdac129 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,7 +454,10 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
static bool needs_preempt_context(struct drm_i915_private *i915)
{
- return HAS_LOGICAL_RING_PREEMPTION(i915);
+ return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+ (!HAS_HW_PREEMPT_TO_IDLE(i915) ||
+ (HAS_HW_PREEMPT_TO_IDLE(i915) &&
+ !USES_GUC_SUBMISSION(i915)));
Why do we keep the preempt context for !USES_GUC_SUBMISSION(i915) even
if HAS_HW_PREEMPT_TO_IDLE(i915)? After this patch we shouldn't need it
anymore, right?
Post by Tomasz Lis
}
int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1
static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 29dcf34..8fe6795 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -526,31 +527,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
static void inject_preempt_context(struct intel_engine_cs *engine)
For gen11+ we don't inject a preempt context anymore, maybe we can
rename this function to something like "inject_preempt()".
Post by Tomasz Lis
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce =
- to_intel_context(engine->i915->preempt_context, engine);
- unsigned int n;
- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
- GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+ if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+ /*
+ * If we have hardware preempt-to-idle, we do not need to
+ * inject any job to the hardware. We only set a flag.
+ */
+ GEM_TRACE("%s\n", engine->name);
This trace is in both conditional branches, might be cleaner to just put
it before the if statement.
Post by Tomasz Lis
- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
- GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);
- write_desc(execlists, ce->lrc_desc, n);
+ /* trigger preemption to idle */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+ } else {
+ struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+ unsigned int n;
- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ GEM_BUG_ON(execlists->preempt_complete_status !=
+ upper_32_bits(ce->lrc_desc));
+ GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ GEM_TRACE("%s\n", engine->name);
+ for (n = execlists_num_ports(execlists); --n; )
+ write_desc(execlists, 0, n);
+
+ write_desc(execlists, ce->lrc_desc, n);
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ }
execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -1045,22 +1064,51 @@ static void execlists_submission_tasklet(unsigned long data)
status, buf[2*head + 1],
execlists->active);
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ /*
+ * Check if preempted from idle to idle directly.
+ * The STATUS_IDLE_ACTIVE flag is used to mark
+ * such transition.
+ */
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_HWACK);
EXECLISTS_ACTIVE_HWACK should be already clear here (we clear it both
when we inject the pre-emption and on the previous A->I CSB event), so
there should be no need to clear it.
Post by Tomasz Lis
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ /*
+ * We could not have COMPLETED anything
+ * if we were idle before preemption.
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+ }
+
+ else {
nitpick: formatting is wrong here.

Daniele
Post by Tomasz Lis
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /*
+ * We should never get a
+ * COMPLETED | IDLE_ACTIVE!
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ }
+
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
execlists_cancel_port_requests(execlists);
@@ -2217,7 +2265,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4ec7d8d..b1083ac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510)
#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550)
#define EL_CTRL_LOAD (1 << 0)
+#define EL_CTRL_PREEMPT_TO_IDLE (1 << 1)
/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
Lis, Tomasz
2018-05-21 10:16:04 UTC
Permalink
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
v2: Added needs_preempt_context() change so that it is not created when
     preempt-to-idle is supported. (Chris)
     Updated setting HWACK flag so that it is cleared after
     preempt-to-dle. (Chris, Daniele)
     Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
     Merged preemption trigger functions to one. (Chris)
     Fixed context state to not assume COMPLETED_MASK after preemption,
     since idle-to-idle case will not have it set.
Bspec: 18922
---
  drivers/gpu/drm/i915/i915_drv.h          |   2 +
  drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
  drivers/gpu/drm/i915/i915_pci.c          |   3 +-
  drivers/gpu/drm/i915/intel_device_info.h |   1 +
  drivers/gpu/drm/i915/intel_lrc.c         | 115
++++++++++++++++++++++---------
  drivers/gpu/drm/i915/intel_lrc.h         |   1 +
  6 files changed, 92 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h
b/drivers/gpu/drm/i915/i915_drv.h
index 57fb3aa..6e9647b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2535,6 +2535,8 @@ intel_info(const struct drm_i915_private *dev_priv)
          ((dev_priv)->info.has_logical_ring_elsq)
  #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
          ((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+        ((dev_priv)->info.has_hw_preempt_to_idle)
    #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
  diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
b/drivers/gpu/drm/i915/i915_gem_context.c
index 33f8a4b..bdac129 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,7 +454,10 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
    static bool needs_preempt_context(struct drm_i915_private *i915)
  {
-    return HAS_LOGICAL_RING_PREEMPTION(i915);
+    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+           (!HAS_HW_PREEMPT_TO_IDLE(i915) ||
+        (HAS_HW_PREEMPT_TO_IDLE(i915) &&
+        !USES_GUC_SUBMISSION(i915)));
Why do we keep the preempt context for !USES_GUC_SUBMISSION(i915) even
if HAS_HW_PREEMPT_TO_IDLE(i915)? After this patch we shouldn't need it
anymore, right?
The patch only provides gen11 way for the non-GuC submission. This is
why the condition is so convoluted - preempt_context is still needed if
we use GuC.
This will be simplified after GuC paches are added.
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  }
    int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c
b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info
intel_cannonlake_info = {
      GEN(11), \
      .ddb_size = 2048, \
      .has_csr = 0, \
-    .has_logical_ring_elsq = 1
+    .has_logical_ring_elsq = 1, \
+    .has_hw_preempt_to_idle = 1
    static const struct intel_device_info intel_icelake_11_info = {
      GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
      func(has_logical_ring_contexts); \
      func(has_logical_ring_elsq); \
      func(has_logical_ring_preemption); \
+    func(has_hw_preempt_to_idle); \
      func(has_overlay); \
      func(has_pooled_eu); \
      func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c
b/drivers/gpu/drm/i915/intel_lrc.c
index 29dcf34..8fe6795 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
  #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
  #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
  #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
    #define GEN8_CTX_STATUS_COMPLETED_MASK \
       (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -526,31 +527,49 @@ static void port_assign(struct execlist_port
*port, struct i915_request *rq)
  static void inject_preempt_context(struct intel_engine_cs *engine)
For gen11+ we don't inject a preempt context anymore, maybe we can
rename this function to something like "inject_preempt()".
My initial approach was to just add a second function. Merging the
changes to inject_preempt_context() was requested by Chris; as I
understand it is to minimize refactoring in other work in progress.
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  {
      struct intel_engine_execlists *execlists = &engine->execlists;
-    struct intel_context *ce =
-        to_intel_context(engine->i915->preempt_context, engine);
-    unsigned int n;
  -    GEM_BUG_ON(execlists->preempt_complete_status !=
-           upper_32_bits(ce->lrc_desc));
-    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+        /*
+         * If we have hardware preempt-to-idle, we do not need to
+         * inject any job to the hardware. We only set a flag.
+         */
+        GEM_TRACE("%s\n", engine->name);
This trace is in both conditional branches, might be cleaner to just
put it before the if statement.
True, I did not differentiated the messages. Will put before.
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  -    /*
-     * Switch to our empty preempt context so
-     * the state of the GPU is known (idle).
-     */
-    GEM_TRACE("%s\n", engine->name);
-    for (n = execlists_num_ports(execlists); --n; )
-        write_desc(execlists, 0, n);
+        /*
+         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+         */
+        GEM_BUG_ON(execlists->ctrl_reg == NULL);
  -    write_desc(execlists, ce->lrc_desc, n);
+        /* trigger preemption to idle */
+        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+    } else {
+        struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+        unsigned int n;
  -    /* we need to manually load the submit queue */
-    if (execlists->ctrl_reg)
-        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+        GEM_BUG_ON(execlists->preempt_complete_status !=
+               upper_32_bits(ce->lrc_desc));
+        GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+        /*
+         * Switch to our empty preempt context so
+         * the state of the GPU is known (idle).
+         */
+        GEM_TRACE("%s\n", engine->name);
+        for (n = execlists_num_ports(execlists); --n; )
+            write_desc(execlists, 0, n);
+
+        write_desc(execlists, ce->lrc_desc, n);
+
+        /* we need to manually load the submit queue */
+        if (execlists->ctrl_reg)
+            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+    }
        execlists_clear_active(&engine->execlists,
EXECLISTS_ACTIVE_HWACK);
      execlists_set_active(&engine->execlists,
EXECLISTS_ACTIVE_PREEMPT);
@@ -1045,22 +1064,51 @@ static void
execlists_submission_tasklet(unsigned long data)
                    status, buf[2*head + 1],
                    execlists->active);
  -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-                      GEN8_CTX_STATUS_PREEMPTED))
-                execlists_set_active(execlists,
-                             EXECLISTS_ACTIVE_HWACK);
-            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+            /*
+             * Check if preempted from idle to idle directly.
+             * The STATUS_IDLE_ACTIVE flag is used to mark
+             * such transition.
+             */
+            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
                  execlists_clear_active(execlists,
                                 EXECLISTS_ACTIVE_HWACK);
EXECLISTS_ACTIVE_HWACK should be already clear here (we clear it both
when we inject the pre-emption and on the previous A->I CSB event), so
there should be no need to clear it.
This is a complex case; optimizations here may lead to errors later.
But I agree - since this block is only entered on idle-to-idle
preemption, and setting the flag can only happen when hardware is not
idle, we should never see the ACTIVE_HWACK flag set here.
I will change it to GEM_BUG_ON(), unless I will get any errors in
testing that.
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-                continue;
+                /*
+                 * We could not have COMPLETED anything
+                 * if we were idle before preemption.
+                 */
+                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+            }
+
+            else {
nitpick: formatting is wrong here.
ack.
Post by Daniele Ceraolo Spurio
Daniele
Post by Tomasz Lis
+                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+                          GEN8_CTX_STATUS_PREEMPTED))
+                    execlists_set_active(execlists,
+                               EXECLISTS_ACTIVE_HWACK);
+
+                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+                    execlists_clear_active(execlists,
+                               EXECLISTS_ACTIVE_HWACK);
  -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
-            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+                    continue;
  -            if (status & GEN8_CTX_STATUS_COMPLETE &&
-                buf[2*head + 1] ==
execlists->preempt_complete_status) {
+                /*
+                 * We should never get a
+                 * COMPLETED | IDLE_ACTIVE!
+                 */
+                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+            }
+
+            /*
+             * Check if preempted to real idle, either directly or
+             * the preemptive context already finished executing
+             */
+            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+                (status & GEN8_CTX_STATUS_COMPLETE &&
+                buf[2*head + 1] ==
execlists->preempt_complete_status)) {
                  GEM_TRACE("%s preempt-idle\n", engine->name);
                    execlists_cancel_port_requests(execlists);
@@ -2217,7 +2265,8 @@ static void
execlists_set_default_submission(struct intel_engine_cs *engine)
      engine->unpark = NULL;
        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-    if (engine->i915->preempt_context)
+    if (engine->i915->preempt_context ||
+        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
          engine->flags |= I915_ENGINE_HAS_PREEMPTION;
        engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h
b/drivers/gpu/drm/i915/intel_lrc.h
index 4ec7d8d..b1083ac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
  #define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base
+ 0x510)
  #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base +
0x550)
  #define      EL_CTRL_LOAD                (1 << 0)
+#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
    /* The docs specify that the write pointer wraps around after 5h,
"After status
   * is written out to the last available status QW at offset 5h,
this pointer
Ceraolo Spurio, Daniele
2018-05-22 14:39:26 UTC
Permalink
Post by Lis, Tomasz
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
v2: Added needs_preempt_context() change so that it is not created when
     preempt-to-idle is supported. (Chris)
     Updated setting HWACK flag so that it is cleared after
     preempt-to-dle. (Chris, Daniele)
     Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
     Merged preemption trigger functions to one. (Chris)
     Fixed context state to not assume COMPLETED_MASK after preemption,
     since idle-to-idle case will not have it set.
Bspec: 18922
---
  drivers/gpu/drm/i915/i915_drv.h          |   2 +
  drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
  drivers/gpu/drm/i915/i915_pci.c          |   3 +-
  drivers/gpu/drm/i915/intel_device_info.h |   1 +
  drivers/gpu/drm/i915/intel_lrc.c         | 115
++++++++++++++++++++++---------
  drivers/gpu/drm/i915/intel_lrc.h         |   1 +
  6 files changed, 92 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h
b/drivers/gpu/drm/i915/i915_drv.h
index 57fb3aa..6e9647b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2535,6 +2535,8 @@ intel_info(const struct drm_i915_private *dev_priv)
          ((dev_priv)->info.has_logical_ring_elsq)
  #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
          ((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+        ((dev_priv)->info.has_hw_preempt_to_idle)
    #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
  diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
b/drivers/gpu/drm/i915/i915_gem_context.c
index 33f8a4b..bdac129 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,7 +454,10 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
    static bool needs_preempt_context(struct drm_i915_private *i915)
  {
-    return HAS_LOGICAL_RING_PREEMPTION(i915);
+    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+           (!HAS_HW_PREEMPT_TO_IDLE(i915) ||
+        (HAS_HW_PREEMPT_TO_IDLE(i915) &&
+        !USES_GUC_SUBMISSION(i915)));
Why do we keep the preempt context for !USES_GUC_SUBMISSION(i915) even
if HAS_HW_PREEMPT_TO_IDLE(i915)? After this patch we shouldn't need it
anymore, right?
The patch only provides gen11 way for the non-GuC submission. This is
why the condition is so convoluted - preempt_context is still needed if
we use GuC.
This will be simplified after GuC paches are added.
mmm I think this check is the other way around because it returns true
when HAS_HW_PREEMPT_TO_IDLE for !USES_GUC_SUBMISSION, so when GuC is not
in use. BTW, GuC does not support using the preempt context on platforms
that have HW supported preempt-to-idle, so there is no need to keep the
preempt context around for GuC.
Post by Lis, Tomasz
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  }
    int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c
b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info
intel_cannonlake_info = {
      GEN(11), \
      .ddb_size = 2048, \
      .has_csr = 0, \
-    .has_logical_ring_elsq = 1
+    .has_logical_ring_elsq = 1, \
+    .has_hw_preempt_to_idle = 1
    static const struct intel_device_info intel_icelake_11_info = {
      GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
      func(has_logical_ring_contexts); \
      func(has_logical_ring_elsq); \
      func(has_logical_ring_preemption); \
+    func(has_hw_preempt_to_idle); \
      func(has_overlay); \
      func(has_pooled_eu); \
      func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c
b/drivers/gpu/drm/i915/intel_lrc.c
index 29dcf34..8fe6795 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
  #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
  #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
  #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
    #define GEN8_CTX_STATUS_COMPLETED_MASK \
       (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -526,31 +527,49 @@ static void port_assign(struct execlist_port
*port, struct i915_request *rq)
  static void inject_preempt_context(struct intel_engine_cs *engine)
For gen11+ we don't inject a preempt context anymore, maybe we can
rename this function to something like "inject_preempt()".
My initial approach was to just add a second function. Merging the
changes to inject_preempt_context() was requested by Chris; as I
understand it is to minimize refactoring in other work in progress.
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  {
      struct intel_engine_execlists *execlists = &engine->execlists;
-    struct intel_context *ce =
-        to_intel_context(engine->i915->preempt_context, engine);
-    unsigned int n;
  -    GEM_BUG_ON(execlists->preempt_complete_status !=
-           upper_32_bits(ce->lrc_desc));
-    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+        /*
+         * If we have hardware preempt-to-idle, we do not need to
+         * inject any job to the hardware. We only set a flag.
+         */
+        GEM_TRACE("%s\n", engine->name);
This trace is in both conditional branches, might be cleaner to just
put it before the if statement.
True, I did not differentiated the messages. Will put before.
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  -    /*
-     * Switch to our empty preempt context so
-     * the state of the GPU is known (idle).
-     */
-    GEM_TRACE("%s\n", engine->name);
-    for (n = execlists_num_ports(execlists); --n; )
-        write_desc(execlists, 0, n);
+        /*
+         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+         */
+        GEM_BUG_ON(execlists->ctrl_reg == NULL);
  -    write_desc(execlists, ce->lrc_desc, n);
+        /* trigger preemption to idle */
+        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+    } else {
+        struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+        unsigned int n;
  -    /* we need to manually load the submit queue */
-    if (execlists->ctrl_reg)
-        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+        GEM_BUG_ON(execlists->preempt_complete_status !=
+               upper_32_bits(ce->lrc_desc));
+        GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+        /*
+         * Switch to our empty preempt context so
+         * the state of the GPU is known (idle).
+         */
+        GEM_TRACE("%s\n", engine->name);
+        for (n = execlists_num_ports(execlists); --n; )
+            write_desc(execlists, 0, n);
+
+        write_desc(execlists, ce->lrc_desc, n);
+
+        /* we need to manually load the submit queue */
+        if (execlists->ctrl_reg)
+            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+    }
        execlists_clear_active(&engine->execlists,
EXECLISTS_ACTIVE_HWACK);
      execlists_set_active(&engine->execlists,
EXECLISTS_ACTIVE_PREEMPT);
@@ -1045,22 +1064,51 @@ static void
execlists_submission_tasklet(unsigned long data)
                    status, buf[2*head + 1],
                    execlists->active);
  -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-                      GEN8_CTX_STATUS_PREEMPTED))
-                execlists_set_active(execlists,
-                             EXECLISTS_ACTIVE_HWACK);
-            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+            /*
+             * Check if preempted from idle to idle directly.
+             * The STATUS_IDLE_ACTIVE flag is used to mark
+             * such transition.
+             */
+            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
                  execlists_clear_active(execlists,
                                 EXECLISTS_ACTIVE_HWACK);
EXECLISTS_ACTIVE_HWACK should be already clear here (we clear it both
when we inject the pre-emption and on the previous A->I CSB event), so
there should be no need to clear it.
This is a complex case; optimizations here may lead to errors later.
But I agree - since this block is only entered on idle-to-idle
preemption, and setting the flag can only happen when hardware is not
idle, we should never see the ACTIVE_HWACK flag set here.
I will change it to GEM_BUG_ON(), unless I will get any errors in
testing that.
I'm not sure we actually need to care at all about
EXECLISTS_ACTIVE_HWACK here. From what I can see that is only used to
make sure we don't submit while the execlists HW is loading the current
submission. In this case however we're sure no submissions are occurring
because EXECLISTS_ACTIVE_PREEMPT is set, so we're already guarded.

Daniele
Post by Lis, Tomasz
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-                continue;
+                /*
+                 * We could not have COMPLETED anything
+                 * if we were idle before preemption.
+                 */
+                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+            }
+
+            else {
nitpick: formatting is wrong here.
ack.
Post by Daniele Ceraolo Spurio
Daniele
Post by Tomasz Lis
+                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+                          GEN8_CTX_STATUS_PREEMPTED))
+                    execlists_set_active(execlists,
+                               EXECLISTS_ACTIVE_HWACK);
+
+                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+                    execlists_clear_active(execlists,
+                               EXECLISTS_ACTIVE_HWACK);
  -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
-            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+                    continue;
  -            if (status & GEN8_CTX_STATUS_COMPLETE &&
-                buf[2*head + 1] ==
execlists->preempt_complete_status) {
+                /*
+                 * We should never get a
+                 * COMPLETED | IDLE_ACTIVE!
+                 */
+                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+            }
+
+            /*
+             * Check if preempted to real idle, either directly or
+             * the preemptive context already finished executing
+             */
+            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+                (status & GEN8_CTX_STATUS_COMPLETE &&
+                buf[2*head + 1] ==
execlists->preempt_complete_status)) {
                  GEM_TRACE("%s preempt-idle\n", engine->name);
                    execlists_cancel_port_requests(execlists);
@@ -2217,7 +2265,8 @@ static void
execlists_set_default_submission(struct intel_engine_cs *engine)
      engine->unpark = NULL;
        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-    if (engine->i915->preempt_context)
+    if (engine->i915->preempt_context ||
+        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
          engine->flags |= I915_ENGINE_HAS_PREEMPTION;
        engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h
b/drivers/gpu/drm/i915/intel_lrc.h
index 4ec7d8d..b1083ac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
  #define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base
+ 0x510)
  #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base +
0x550)
  #define      EL_CTRL_LOAD                (1 << 0)
+#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
    /* The docs specify that the write pointer wraps around after 5h,
"After status
   * is written out to the last available status QW at offset 5h,
this pointer
Lis, Tomasz
2018-05-22 14:54:56 UTC
Permalink
Post by Ceraolo Spurio, Daniele
Post by Lis, Tomasz
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
v2: Added needs_preempt_context() change so that it is not created when
     preempt-to-idle is supported. (Chris)
     Updated setting HWACK flag so that it is cleared after
     preempt-to-dle. (Chris, Daniele)
     Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
     Merged preemption trigger functions to one. (Chris)
     Fixed context state to not assume COMPLETED_MASK after preemption,
     since idle-to-idle case will not have it set.
Bspec: 18922
---
  drivers/gpu/drm/i915/i915_drv.h          |   2 +
  drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
  drivers/gpu/drm/i915/i915_pci.c          |   3 +-
  drivers/gpu/drm/i915/intel_device_info.h |   1 +
  drivers/gpu/drm/i915/intel_lrc.c         | 115
++++++++++++++++++++++---------
  drivers/gpu/drm/i915/intel_lrc.h         |   1 +
  6 files changed, 92 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h
b/drivers/gpu/drm/i915/i915_drv.h
index 57fb3aa..6e9647b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2535,6 +2535,8 @@ intel_info(const struct drm_i915_private *dev_priv)
          ((dev_priv)->info.has_logical_ring_elsq)
  #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
          ((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+        ((dev_priv)->info.has_hw_preempt_to_idle)
    #define HAS_EXECLISTS(dev_priv)
HAS_LOGICAL_RING_CONTEXTS(dev_priv)
  diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
b/drivers/gpu/drm/i915/i915_gem_context.c
index 33f8a4b..bdac129 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,7 +454,10 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
    static bool needs_preempt_context(struct drm_i915_private *i915)
  {
-    return HAS_LOGICAL_RING_PREEMPTION(i915);
+    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+           (!HAS_HW_PREEMPT_TO_IDLE(i915) ||
+        (HAS_HW_PREEMPT_TO_IDLE(i915) &&
+        !USES_GUC_SUBMISSION(i915)));
Why do we keep the preempt context for !USES_GUC_SUBMISSION(i915)
even if HAS_HW_PREEMPT_TO_IDLE(i915)? After this patch we shouldn't
need it anymore, right?
The patch only provides gen11 way for the non-GuC submission. This is
why the condition is so convoluted - preempt_context is still needed
if we use GuC.
This will be simplified after GuC paches are added.
mmm I think this check is the other way around because it returns true
when HAS_HW_PREEMPT_TO_IDLE for !USES_GUC_SUBMISSION, so when GuC is
not in use.
Yes, agreed. USES_GUC_SUBMISSION should not be negated.
Post by Ceraolo Spurio, Daniele
BTW, GuC does not support using the preempt context on platforms that
have HW supported preempt-to-idle, so there is no need to keep the
preempt context around for GuC.
Oh, I did not knew that. So the preemption is completely disabled on
gen11 with GuC then? (because patches for gen11 preempt-to-idle are not
upstreamed)?
Post by Ceraolo Spurio, Daniele
Post by Lis, Tomasz
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  }
    int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c
b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info
intel_cannonlake_info = {
      GEN(11), \
      .ddb_size = 2048, \
      .has_csr = 0, \
-    .has_logical_ring_elsq = 1
+    .has_logical_ring_elsq = 1, \
+    .has_hw_preempt_to_idle = 1
    static const struct intel_device_info intel_icelake_11_info = {
      GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
      func(has_logical_ring_contexts); \
      func(has_logical_ring_elsq); \
      func(has_logical_ring_preemption); \
+    func(has_hw_preempt_to_idle); \
      func(has_overlay); \
      func(has_pooled_eu); \
      func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c
b/drivers/gpu/drm/i915/intel_lrc.c
index 29dcf34..8fe6795 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
  #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
  #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
  #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
    #define GEN8_CTX_STATUS_COMPLETED_MASK \
       (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -526,31 +527,49 @@ static void port_assign(struct execlist_port
*port, struct i915_request *rq)
  static void inject_preempt_context(struct intel_engine_cs *engine)
For gen11+ we don't inject a preempt context anymore, maybe we can
rename this function to something like "inject_preempt()".
My initial approach was to just add a second function. Merging the
changes to inject_preempt_context() was requested by Chris; as I
understand it is to minimize refactoring in other work in progress.
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  {
      struct intel_engine_execlists *execlists = &engine->execlists;
-    struct intel_context *ce =
- to_intel_context(engine->i915->preempt_context, engine);
-    unsigned int n;
  -    GEM_BUG_ON(execlists->preempt_complete_status !=
-           upper_32_bits(ce->lrc_desc));
-    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+        /*
+         * If we have hardware preempt-to-idle, we do not need to
+         * inject any job to the hardware. We only set a flag.
+         */
+        GEM_TRACE("%s\n", engine->name);
This trace is in both conditional branches, might be cleaner to just
put it before the if statement.
True, I did not differentiated the messages. Will put before.
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  -    /*
-     * Switch to our empty preempt context so
-     * the state of the GPU is known (idle).
-     */
-    GEM_TRACE("%s\n", engine->name);
-    for (n = execlists_num_ports(execlists); --n; )
-        write_desc(execlists, 0, n);
+        /*
+         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+         */
+        GEM_BUG_ON(execlists->ctrl_reg == NULL);
  -    write_desc(execlists, ce->lrc_desc, n);
+        /* trigger preemption to idle */
+        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+    } else {
+        struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+        unsigned int n;
  -    /* we need to manually load the submit queue */
-    if (execlists->ctrl_reg)
-        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+        GEM_BUG_ON(execlists->preempt_complete_status !=
+               upper_32_bits(ce->lrc_desc));
+ GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+        /*
+         * Switch to our empty preempt context so
+         * the state of the GPU is known (idle).
+         */
+        GEM_TRACE("%s\n", engine->name);
+        for (n = execlists_num_ports(execlists); --n; )
+            write_desc(execlists, 0, n);
+
+        write_desc(execlists, ce->lrc_desc, n);
+
+        /* we need to manually load the submit queue */
+        if (execlists->ctrl_reg)
+            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+    }
        execlists_clear_active(&engine->execlists,
EXECLISTS_ACTIVE_HWACK);
      execlists_set_active(&engine->execlists,
EXECLISTS_ACTIVE_PREEMPT);
@@ -1045,22 +1064,51 @@ static void
execlists_submission_tasklet(unsigned long data)
                    status, buf[2*head + 1],
                    execlists->active);
  -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-                      GEN8_CTX_STATUS_PREEMPTED))
-                execlists_set_active(execlists,
-                             EXECLISTS_ACTIVE_HWACK);
-            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+            /*
+             * Check if preempted from idle to idle directly.
+             * The STATUS_IDLE_ACTIVE flag is used to mark
+             * such transition.
+             */
+            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
                  execlists_clear_active(execlists,
                                 EXECLISTS_ACTIVE_HWACK);
EXECLISTS_ACTIVE_HWACK should be already clear here (we clear it
both when we inject the pre-emption and on the previous A->I CSB
event), so there should be no need to clear it.
This is a complex case; optimizations here may lead to errors later.
But I agree - since this block is only entered on idle-to-idle
preemption, and setting the flag can only happen when hardware is not
idle, we should never see the ACTIVE_HWACK flag set here.
I will change it to GEM_BUG_ON(), unless I will get any errors in
testing that.
I'm not sure we actually need to care at all about
EXECLISTS_ACTIVE_HWACK here. From what I can see that is only used to
make sure we don't submit while the execlists HW is loading the
current submission. In this case however we're sure no submissions are
occurring because EXECLISTS_ACTIVE_PREEMPT is set, so we're already
guarded.
Daniele
Post by Lis, Tomasz
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-                continue;
+                /*
+                 * We could not have COMPLETED anything
+                 * if we were idle before preemption.
+                 */
+                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+            }
+
+            else {
nitpick: formatting is wrong here.
ack.
Post by Daniele Ceraolo Spurio
Daniele
Post by Tomasz Lis
+                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+                          GEN8_CTX_STATUS_PREEMPTED))
+                    execlists_set_active(execlists,
+                               EXECLISTS_ACTIVE_HWACK);
+
+                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+                    execlists_clear_active(execlists,
+                               EXECLISTS_ACTIVE_HWACK);
  -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
-            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+                    continue;
  -            if (status & GEN8_CTX_STATUS_COMPLETE &&
-                buf[2*head + 1] ==
execlists->preempt_complete_status) {
+                /*
+                 * We should never get a
+                 * COMPLETED | IDLE_ACTIVE!
+                 */
+                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+            }
+
+            /*
+             * Check if preempted to real idle, either directly or
+             * the preemptive context already finished executing
+             */
+            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+                (status & GEN8_CTX_STATUS_COMPLETE &&
+                buf[2*head + 1] ==
execlists->preempt_complete_status)) {
                  GEM_TRACE("%s preempt-idle\n", engine->name);
execlists_cancel_port_requests(execlists);
@@ -2217,7 +2265,8 @@ static void
execlists_set_default_submission(struct intel_engine_cs *engine)
      engine->unpark = NULL;
        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-    if (engine->i915->preempt_context)
+    if (engine->i915->preempt_context ||
+        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
          engine->flags |= I915_ENGINE_HAS_PREEMPTION;
        engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h
b/drivers/gpu/drm/i915/intel_lrc.h
index 4ec7d8d..b1083ac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
  #define RING_EXECLIST_SQ_CONTENTS(engine)
_MMIO((engine)->mmio_base + 0x510)
  #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base +
0x550)
  #define      EL_CTRL_LOAD                (1 << 0)
+#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
    /* The docs specify that the write pointer wraps around after
5h, "After status
   * is written out to the last available status QW at offset 5h,
this pointer
Patchwork
2018-05-11 16:33:28 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev3)
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4169 -> Patchwork_8983 =

== Summary - SUCCESS ==

No regressions found.

External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/3/mbox/


== Changes ==

No changes found


== Participating hosts (41 -> 37) ==

Missing (4): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-skl-6700hq


== Build changes ==

* Linux: CI_DRM_4169 -> Patchwork_8983

CI_DRM_4169: 05bfe2ceaa9df8f56313507ae01344971fa4f8f4 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4475: 35f08c12aa216d5b62a5b9984b575cee6905098f @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_8983: 3492dcf9f1e429e4bd7fe2b95c5f5a912f5a4ade @ git://anongit.freedesktop.org/gfx-ci/linux
piglit_4475: 3ba0657bff4216d1ec7179935590261855f1651e @ git://anongit.freedesktop.org/piglit


== Linux commits ==

3492dcf9f1e4 drm/i915/gen11: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8983/issues.html
Tomasz Lis
2018-05-25 18:26:38 UTC
Permalink
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

Cc: Joonas Lahtinen <***@linux.intel.com>
Cc: Chris Wilson <***@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <***@intel.com>
Cc: Michal Winiarski <***@intel.com>
Cc: Mika Kuoppala <***@intel.com>
Bspec: 18922
Signed-off-by: Tomasz Lis <***@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem_context.c | 3 +-
drivers/gpu/drm/i915/i915_pci.c | 3 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 113 +++++++++++++++++++++----------
drivers/gpu/drm/i915/intel_lrc.h | 1 +
6 files changed, 86 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 487922f..35eddf7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)

#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 45393f6..341a5ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context **ctxp)

static bool needs_preempt_context(struct drm_i915_private *i915)
{
- return HAS_LOGICAL_RING_PREEMPTION(i915);
+ return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+ !HAS_HW_PREEMPT_TO_IDLE(i915);
}

int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 97a91e6a..ee09926 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -593,7 +593,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1

static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8a6058b..f95cb37 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)

#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -522,31 +523,46 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
static void inject_preempt_context(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce =
- to_intel_context(engine->i915->preempt_context, engine);
- unsigned int n;
-
- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
- GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));

- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
+ if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);

- write_desc(execlists, ce->lrc_desc, n);
+ /*
+ * If we have hardware preempt-to-idle, we do not need to
+ * inject any job to the hardware. We only set a flag.
+ */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+ } else {
+ struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+ unsigned int n;

- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ GEM_BUG_ON(execlists->preempt_complete_status !=
+ upper_32_bits(ce->lrc_desc));
+ GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ for (n = execlists_num_ports(execlists); --n; )
+ write_desc(execlists, 0, n);
+
+ write_desc(execlists, ce->lrc_desc, n);
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ }

execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -1031,22 +1047,48 @@ static void process_csb(struct intel_engine_cs *engine)
status, buf[2*head + 1],
execlists->active);

- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
+ /*
+ * Check if preempted from idle to idle directly.
+ * The STATUS_IDLE_ACTIVE flag is used to mark
+ * such transition.
+ */
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
+ /* Cannot be waiting for HWACK while HW is idle */
+ GEM_BUG_ON(execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_HWACK));

- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ /*
+ * We could not have COMPLETED anything
+ * if we were idle before preemption.
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+ } else {
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;
+
+ /* We should never get a COMPLETED | IDLE_ACTIVE! */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ }

- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);

- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
complete_preempt_context(execlists);
continue;
@@ -2337,7 +2379,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;

engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;

engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194..3249e9b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510)
#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550)
#define EL_CTRL_LOAD (1 << 0)
+#define EL_CTRL_PREEMPT_TO_IDLE (1 << 1)

/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
--
2.7.4
Daniele Ceraolo Spurio
2018-06-11 16:37:46 UTC
Permalink
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.
v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
Bspec: 18922
---
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem_context.c | 3 +-
drivers/gpu/drm/i915/i915_pci.c | 3 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 113 +++++++++++++++++++++----------
drivers/gpu/drm/i915/intel_lrc.h | 1 +
6 files changed, 86 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 487922f..35eddf7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)
#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 45393f6..341a5ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
static bool needs_preempt_context(struct drm_i915_private *i915)
{
- return HAS_LOGICAL_RING_PREEMPTION(i915);
+ return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+ !HAS_HW_PREEMPT_TO_IDLE(i915);
}
int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 97a91e6a..ee09926 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -593,7 +593,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1
static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8a6058b..f95cb37 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -522,31 +523,46 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
static void inject_preempt_context(struct intel_engine_cs *engine)
continuing the discussion from the previous patch, I still think that we
should rename this function now that it doesn't inject a context on some
gens. A new function name should be relatively trivial to handle from
other patch series hitting the area (compared to having a second function).
Post by Tomasz Lis
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce =
- to_intel_context(engine->i915->preempt_context, engine);
- unsigned int n;
-
- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
- GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
+ if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);
- write_desc(execlists, ce->lrc_desc, n);
+ /*
+ * If we have hardware preempt-to-idle, we do not need to
+ * inject any job to the hardware. We only set a flag.
+ */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+ } else {
+ struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+ unsigned int n;
- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ GEM_BUG_ON(execlists->preempt_complete_status !=
+ upper_32_bits(ce->lrc_desc));
+ GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ for (n = execlists_num_ports(execlists); --n; )
+ write_desc(execlists, 0, n);
+
+ write_desc(execlists, ce->lrc_desc, n);
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ }
execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -1031,22 +1047,48 @@ static void process_csb(struct intel_engine_cs *engine)
status, buf[2*head + 1],
execlists->active);
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
+ /*
+ * Check if preempted from idle to idle directly.
+ * The STATUS_IDLE_ACTIVE flag is used to mark
+ * such transition.
+ */
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
+ /* Cannot be waiting for HWACK while HW is idle */
This comment does not match the check, since if the
EXECLISTS_ACTIVE_HWACK is set it means we've received the hw ack, not
that we're waiting for it. Personally I would just remove the BUG_ON
since we don't really care about the value of HWACK as long as
EXECLISTS_ACTIVE_PREEMPT is set, as the latter ensures us we're not
going to submit work until the whole preempt process is complete. A
BUG_ON for EXECLISTS_ACTIVE_PREEMPT is already in
complete_preempt_context so we're covered on that side.

With the 2 minor comments addressed:

Reviewed-by: Daniele Ceraolo Spurio <***@intel.com>

Daniele
Post by Tomasz Lis
+ GEM_BUG_ON(execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_HWACK));
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ /*
+ * We could not have COMPLETED anything
+ * if we were idle before preemption.
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+ } else {
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;
+
+ /* We should never get a COMPLETED | IDLE_ACTIVE! */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ }
- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
complete_preempt_context(execlists);
continue;
@@ -2337,7 +2379,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194..3249e9b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510)
#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550)
#define EL_CTRL_LOAD (1 << 0)
+#define EL_CTRL_PREEMPT_TO_IDLE (1 << 1)
/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
Lis, Tomasz
2018-06-29 16:50:56 UTC
Permalink
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
v2: Added needs_preempt_context() change so that it is not created when
     preempt-to-idle is supported. (Chris)
     Updated setting HWACK flag so that it is cleared after
     preempt-to-dle. (Chris, Daniele)
     Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
     Merged preemption trigger functions to one. (Chris)
     Fixed conyext state tonot assume COMPLETED_MASK after preemption,
     since idle-to-idle case will not have it set.
v4: Simplified needs_preempt_context() change. (Daniele)
     Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
Bspec: 18922
---
  drivers/gpu/drm/i915/i915_drv.h          |   2 +
  drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
  drivers/gpu/drm/i915/i915_pci.c          |   3 +-
  drivers/gpu/drm/i915/intel_device_info.h |   1 +
  drivers/gpu/drm/i915/intel_lrc.c         | 113
+++++++++++++++++++++----------
  drivers/gpu/drm/i915/intel_lrc.h         |   1 +
  6 files changed, 86 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h
b/drivers/gpu/drm/i915/i915_drv.h
index 487922f..35eddf7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private *dev_priv)
          ((dev_priv)->info.has_logical_ring_elsq)
  #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
          ((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+        ((dev_priv)->info.has_hw_preempt_to_idle)
    #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
  diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
b/drivers/gpu/drm/i915/i915_gem_context.c
index 45393f6..341a5ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
    static bool needs_preempt_context(struct drm_i915_private *i915)
  {
-    return HAS_LOGICAL_RING_PREEMPTION(i915);
+    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+           !HAS_HW_PREEMPT_TO_IDLE(i915);
  }
    int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c
b/drivers/gpu/drm/i915/i915_pci.c
index 97a91e6a..ee09926 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -593,7 +593,8 @@ static const struct intel_device_info
intel_cannonlake_info = {
      GEN(11), \
      .ddb_size = 2048, \
      .has_csr = 0, \
-    .has_logical_ring_elsq = 1
+    .has_logical_ring_elsq = 1, \
+    .has_hw_preempt_to_idle = 1
    static const struct intel_device_info intel_icelake_11_info = {
      GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
      func(has_logical_ring_contexts); \
      func(has_logical_ring_elsq); \
      func(has_logical_ring_preemption); \
+    func(has_hw_preempt_to_idle); \
      func(has_overlay); \
      func(has_pooled_eu); \
      func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c
b/drivers/gpu/drm/i915/intel_lrc.c
index 8a6058b..f95cb37 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
  #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
  #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
  #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
    #define GEN8_CTX_STATUS_COMPLETED_MASK \
       (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -522,31 +523,46 @@ static void port_assign(struct execlist_port
*port, struct i915_request *rq)
  static void inject_preempt_context(struct intel_engine_cs *engine)
continuing the discussion from the previous patch, I still think that
we should rename this function now that it doesn't inject a context on
some gens. A new function name should be relatively trivial to handle
from other patch series hitting the area (compared to having a second
function).
Ok, will rename it then.
What would be the most adequate name? execlist_send_preempt_to_idle()?
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  {
      struct intel_engine_execlists *execlists = &engine->execlists;
-    struct intel_context *ce =
-        to_intel_context(engine->i915->preempt_context, engine);
-    unsigned int n;
-
-    GEM_BUG_ON(execlists->preempt_complete_status !=
-           upper_32_bits(ce->lrc_desc));
-    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
  -    /*
-     * Switch to our empty preempt context so
-     * the state of the GPU is known (idle).
-     */
      GEM_TRACE("%s\n", engine->name);
-    for (n = execlists_num_ports(execlists); --n; )
-        write_desc(execlists, 0, n);
+    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+        /*
+         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+         */
+        GEM_BUG_ON(execlists->ctrl_reg == NULL);
  -    write_desc(execlists, ce->lrc_desc, n);
+        /*
+         * If we have hardware preempt-to-idle, we do not need to
+         * inject any job to the hardware. We only set a flag.
+         */
+        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+    } else {
+        struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+        unsigned int n;
  -    /* we need to manually load the submit queue */
-    if (execlists->ctrl_reg)
-        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+        GEM_BUG_ON(execlists->preempt_complete_status !=
+               upper_32_bits(ce->lrc_desc));
+        GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                           CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                          CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+        /*
+         * Switch to our empty preempt context so
+         * the state of the GPU is known (idle).
+         */
+        for (n = execlists_num_ports(execlists); --n; )
+            write_desc(execlists, 0, n);
+
+        write_desc(execlists, ce->lrc_desc, n);
+
+        /* we need to manually load the submit queue */
+        if (execlists->ctrl_reg)
+            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+    }
        execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
      execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -1031,22 +1047,48 @@ static void process_csb(struct
intel_engine_cs *engine)
                    status, buf[2*head + 1],
                    execlists->active);
  -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-                      GEN8_CTX_STATUS_PREEMPTED))
-                execlists_set_active(execlists,
-                             EXECLISTS_ACTIVE_HWACK);
-            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-                execlists_clear_active(execlists,
-                               EXECLISTS_ACTIVE_HWACK);
+            /*
+             * Check if preempted from idle to idle directly.
+             * The STATUS_IDLE_ACTIVE flag is used to mark
+             * such transition.
+             */
+            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
+                /* Cannot be waiting for HWACK while HW is idle */
This comment does not match the check, since if the
EXECLISTS_ACTIVE_HWACK is set it means we've received the hw ack, not
that we're waiting for it. Personally I would just remove the BUG_ON
since we don't really care about the value of HWACK as long as
EXECLISTS_ACTIVE_PREEMPT is set, as the latter ensures us we're not
going to submit work until the whole preempt process is complete. A
BUG_ON for EXECLISTS_ACTIVE_PREEMPT is already in
complete_preempt_context so we're covered on that side.
Will remove.
Post by Daniele Ceraolo Spurio
Daniele
Post by Tomasz Lis
+ GEM_BUG_ON(execlists_is_active(execlists,
+                              EXECLISTS_ACTIVE_HWACK));
  -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-                continue;
+                /*
+                 * We could not have COMPLETED anything
+                 * if we were idle before preemption.
+                 */
+                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+            } else {
+                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+                          GEN8_CTX_STATUS_PREEMPTED))
+                    execlists_set_active(execlists,
+                                 EXECLISTS_ACTIVE_HWACK);
+
+                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+                    execlists_clear_active(execlists,
+                                   EXECLISTS_ACTIVE_HWACK);
+
+                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+                    continue;
+
+                /* We should never get a COMPLETED | IDLE_ACTIVE! */
+                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+            }
  -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
-            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
  -            if (status & GEN8_CTX_STATUS_COMPLETE &&
-                buf[2*head + 1] ==
execlists->preempt_complete_status) {
+            /*
+             * Check if preempted to real idle, either directly or
+             * the preemptive context already finished executing
+             */
+            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+                (status & GEN8_CTX_STATUS_COMPLETE &&
+                buf[2*head + 1] ==
execlists->preempt_complete_status)) {
                  GEM_TRACE("%s preempt-idle\n", engine->name);
                  complete_preempt_context(execlists);
                  continue;
@@ -2337,7 +2379,8 @@ static void
execlists_set_default_submission(struct intel_engine_cs *engine)
      engine->unpark = NULL;
        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-    if (engine->i915->preempt_context)
+    if (engine->i915->preempt_context ||
+        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
          engine->flags |= I915_ENGINE_HAS_PREEMPTION;
        engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h
b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194..3249e9b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
  #define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base
+ 0x510)
  #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base +
0x550)
  #define      EL_CTRL_LOAD                (1 << 0)
+#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
    /* The docs specify that the write pointer wraps around after 5h,
"After status
   * is written out to the last available status QW at offset 5h,
this pointer
Daniele Ceraolo Spurio
2018-07-02 17:36:57 UTC
Permalink
Post by Lis, Tomasz
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
v2: Added needs_preempt_context() change so that it is not created when
     preempt-to-idle is supported. (Chris)
     Updated setting HWACK flag so that it is cleared after
     preempt-to-dle. (Chris, Daniele)
     Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
     Merged preemption trigger functions to one. (Chris)
     Fixed conyext state tonot assume COMPLETED_MASK after preemption,
     since idle-to-idle case will not have it set.
v4: Simplified needs_preempt_context() change. (Daniele)
     Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
Bspec: 18922
---
  drivers/gpu/drm/i915/i915_drv.h          |   2 +
  drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
  drivers/gpu/drm/i915/i915_pci.c          |   3 +-
  drivers/gpu/drm/i915/intel_device_info.h |   1 +
  drivers/gpu/drm/i915/intel_lrc.c         | 113
+++++++++++++++++++++----------
  drivers/gpu/drm/i915/intel_lrc.h         |   1 +
  6 files changed, 86 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h
b/drivers/gpu/drm/i915/i915_drv.h
index 487922f..35eddf7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private *dev_priv)
          ((dev_priv)->info.has_logical_ring_elsq)
  #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
          ((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+        ((dev_priv)->info.has_hw_preempt_to_idle)
    #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
  diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
b/drivers/gpu/drm/i915/i915_gem_context.c
index 45393f6..341a5ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
    static bool needs_preempt_context(struct drm_i915_private *i915)
  {
-    return HAS_LOGICAL_RING_PREEMPTION(i915);
+    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+           !HAS_HW_PREEMPT_TO_IDLE(i915);
  }
    int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c
b/drivers/gpu/drm/i915/i915_pci.c
index 97a91e6a..ee09926 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -593,7 +593,8 @@ static const struct intel_device_info
intel_cannonlake_info = {
      GEN(11), \
      .ddb_size = 2048, \
      .has_csr = 0, \
-    .has_logical_ring_elsq = 1
+    .has_logical_ring_elsq = 1, \
+    .has_hw_preempt_to_idle = 1
    static const struct intel_device_info intel_icelake_11_info = {
      GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
      func(has_logical_ring_contexts); \
      func(has_logical_ring_elsq); \
      func(has_logical_ring_preemption); \
+    func(has_hw_preempt_to_idle); \
      func(has_overlay); \
      func(has_pooled_eu); \
      func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c
b/drivers/gpu/drm/i915/intel_lrc.c
index 8a6058b..f95cb37 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
  #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
  #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
  #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
    #define GEN8_CTX_STATUS_COMPLETED_MASK \
       (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -522,31 +523,46 @@ static void port_assign(struct execlist_port
*port, struct i915_request *rq)
  static void inject_preempt_context(struct intel_engine_cs *engine)
continuing the discussion from the previous patch, I still think that
we should rename this function now that it doesn't inject a context on
some gens. A new function name should be relatively trivial to handle
from other patch series hitting the area (compared to having a second
function).
Ok, will rename it then.
What would be the most adequate name? execlist_send_preempt_to_idle()?
even something simpler like "inject_preemption()" would work IMO. But
I've always been bad with naming, so I'll leave it to your judgment :)

Daniele
Post by Lis, Tomasz
Post by Daniele Ceraolo Spurio
Post by Tomasz Lis
  {
      struct intel_engine_execlists *execlists = &engine->execlists;
-    struct intel_context *ce =
-        to_intel_context(engine->i915->preempt_context, engine);
-    unsigned int n;
-
-    GEM_BUG_ON(execlists->preempt_complete_status !=
-           upper_32_bits(ce->lrc_desc));
-    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
  -    /*
-     * Switch to our empty preempt context so
-     * the state of the GPU is known (idle).
-     */
      GEM_TRACE("%s\n", engine->name);
-    for (n = execlists_num_ports(execlists); --n; )
-        write_desc(execlists, 0, n);
+    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+        /*
+         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+         */
+        GEM_BUG_ON(execlists->ctrl_reg == NULL);
  -    write_desc(execlists, ce->lrc_desc, n);
+        /*
+         * If we have hardware preempt-to-idle, we do not need to
+         * inject any job to the hardware. We only set a flag.
+         */
+        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+    } else {
+        struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+        unsigned int n;
  -    /* we need to manually load the submit queue */
-    if (execlists->ctrl_reg)
-        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+        GEM_BUG_ON(execlists->preempt_complete_status !=
+               upper_32_bits(ce->lrc_desc));
+        GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                           CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                          CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+        /*
+         * Switch to our empty preempt context so
+         * the state of the GPU is known (idle).
+         */
+        for (n = execlists_num_ports(execlists); --n; )
+            write_desc(execlists, 0, n);
+
+        write_desc(execlists, ce->lrc_desc, n);
+
+        /* we need to manually load the submit queue */
+        if (execlists->ctrl_reg)
+            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+    }
        execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
      execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -1031,22 +1047,48 @@ static void process_csb(struct
intel_engine_cs *engine)
                    status, buf[2*head + 1],
                    execlists->active);
  -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-                      GEN8_CTX_STATUS_PREEMPTED))
-                execlists_set_active(execlists,
-                             EXECLISTS_ACTIVE_HWACK);
-            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-                execlists_clear_active(execlists,
-                               EXECLISTS_ACTIVE_HWACK);
+            /*
+             * Check if preempted from idle to idle directly.
+             * The STATUS_IDLE_ACTIVE flag is used to mark
+             * such transition.
+             */
+            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
+                /* Cannot be waiting for HWACK while HW is idle */
This comment does not match the check, since if the
EXECLISTS_ACTIVE_HWACK is set it means we've received the hw ack, not
that we're waiting for it. Personally I would just remove the BUG_ON
since we don't really care about the value of HWACK as long as
EXECLISTS_ACTIVE_PREEMPT is set, as the latter ensures us we're not
going to submit work until the whole preempt process is complete. A
BUG_ON for EXECLISTS_ACTIVE_PREEMPT is already in
complete_preempt_context so we're covered on that side.
Will remove.
Post by Daniele Ceraolo Spurio
Daniele
Post by Tomasz Lis
+ GEM_BUG_ON(execlists_is_active(execlists,
+                              EXECLISTS_ACTIVE_HWACK));
  -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-                continue;
+                /*
+                 * We could not have COMPLETED anything
+                 * if we were idle before preemption.
+                 */
+                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+            } else {
+                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+                          GEN8_CTX_STATUS_PREEMPTED))
+                    execlists_set_active(execlists,
+                                 EXECLISTS_ACTIVE_HWACK);
+
+                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+                    execlists_clear_active(execlists,
+                                   EXECLISTS_ACTIVE_HWACK);
+
+                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+                    continue;
+
+                /* We should never get a COMPLETED | IDLE_ACTIVE! */
+                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+            }
  -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
-            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
  -            if (status & GEN8_CTX_STATUS_COMPLETE &&
-                buf[2*head + 1] ==
execlists->preempt_complete_status) {
+            /*
+             * Check if preempted to real idle, either directly or
+             * the preemptive context already finished executing
+             */
+            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+                (status & GEN8_CTX_STATUS_COMPLETE &&
+                buf[2*head + 1] ==
execlists->preempt_complete_status)) {
                  GEM_TRACE("%s preempt-idle\n", engine->name);
                  complete_preempt_context(execlists);
                  continue;
@@ -2337,7 +2379,8 @@ static void
execlists_set_default_submission(struct intel_engine_cs *engine)
      engine->unpark = NULL;
        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-    if (engine->i915->preempt_context)
+    if (engine->i915->preempt_context ||
+        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
          engine->flags |= I915_ENGINE_HAS_PREEMPTION;
        engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h
b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194..3249e9b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
  #define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base
+ 0x510)
  #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base +
0x550)
  #define      EL_CTRL_LOAD                (1 << 0)
+#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
    /* The docs specify that the write pointer wraps around after 5h,
"After status
   * is written out to the last available status QW at offset 5h,
this pointer
Patchwork
2018-05-25 18:51:40 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev4)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
e56337e6a35d drm/i915/gen11: Preempt-to-idle support in execlists.
-:133: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "!execlists->ctrl_reg"
#133: FILE: drivers/gpu/drm/i915/intel_lrc.c:533:
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);

-:190: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#190: FILE: drivers/gpu/drm/i915/intel_lrc.c:1056:
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {

-:191: CHECK:BRACES: Blank lines aren't necessary after an open brace '{'
#191: FILE: drivers/gpu/drm/i915/intel_lrc.c:1057:
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+

-:194: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#194: FILE: drivers/gpu/drm/i915/intel_lrc.c:1060:
+ GEM_BUG_ON(execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_HWACK));

-:231: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#231: FILE: drivers/gpu/drm/i915/intel_lrc.c:1091:
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
^

total: 0 errors, 0 warnings, 5 checks, 184 lines checked
Patchwork
2018-05-25 18:52:27 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev4)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/gen11: Preempt-to-idle support in execlists.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3664:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3666:16: warning: expression using sizeof(void)
Patchwork
2018-05-25 19:08:47 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev4)
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4244 -> Patchwork_9126 =

== Summary - WARNING ==

Minor unknown changes coming with Patchwork_9126 need to be verified
manually.

If you think the reported changes have nothing to do with the changes
introduced in Patchwork_9126, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.

External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/4/mbox/

== Possible new issues ==

Here are the unknown changes that may have been introduced in Patchwork_9126:

=== IGT changes ===

==== Warnings ====

***@gem_exec_gttfill@basic:
fi-pnv-d510: PASS -> SKIP


== Known issues ==

Here are the changes found in Patchwork_9126 that come from known issues:

=== IGT changes ===

==== Possible fixes ====

***@gem_mmap_gtt@basic-small-bo-tiledx:
fi-gdg-551: FAIL (fdo#102575) -> PASS


fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575


== Participating hosts (43 -> 39) ==

Missing (4): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-skl-6700hq


== Build changes ==

* Linux: CI_DRM_4244 -> Patchwork_9126

CI_DRM_4244: 475c2ec7b8c6e01cce9a360b9839dc0dd0fa9629 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4499: f560ae5a464331f03f0a669ed46b8c9e56526187 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_9126: e56337e6a35dc880c13010e17245256799793498 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

e56337e6a35d drm/i915/gen11: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9126/issues.html
Patchwork
2018-05-26 05:18:14 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev4)
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4244_full -> Patchwork_9126_full =

== Summary - SUCCESS ==

No regressions found.

External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/4/mbox/

== Known issues ==

Here are the changes found in Patchwork_9126_full that come from known issues:

=== IGT changes ===

==== Issues hit ====

***@kms_flip@2x-plain-flip-ts-check-interruptible:
shard-glk: PASS -> FAIL (fdo#100368) +1

***@kms_flip_tiling@flip-to-x-tiled:
shard-glk: PASS -> FAIL (fdo#104724)

***@kms_setmode@basic:
shard-apl: PASS -> FAIL (fdo#99912)
shard-kbl: PASS -> FAIL (fdo#99912)


==== Possible fixes ====

***@drv_selftest@live_hangcheck:
shard-apl: DMESG-FAIL (fdo#106560) -> PASS
shard-glk: DMESG-FAIL (fdo#106560) -> PASS

***@kms_cursor_legacy@cursor-vs-flip-toggle:
shard-hsw: FAIL (fdo#103355) -> PASS

***@kms_flip@2x-dpms-vs-vblank-race-interruptible:
shard-hsw: DMESG-FAIL (fdo#103060) -> PASS

***@kms_flip@2x-modeset-vs-vblank-race:
shard-glk: FAIL (fdo#103060) -> PASS

***@kms_flip@2x-plain-flip-ts-check-interruptible:
shard-hsw: FAIL (fdo#100368) -> PASS

***@kms_flip@dpms-vs-vblank-race:
shard-hsw: FAIL (fdo#103060) -> PASS

***@kms_flip_tiling@flip-x-tiled:
shard-glk: FAIL (fdo#104724, fdo#103822) -> PASS

***@kms_vblank@pipe-a-ts-continuation-modeset:
shard-apl: DMESG-WARN (fdo#106247) -> PASS


fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#103355 https://bugs.freedesktop.org/show_bug.cgi?id=103355
fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
fdo#106247 https://bugs.freedesktop.org/show_bug.cgi?id=106247
fdo#106560 https://bugs.freedesktop.org/show_bug.cgi?id=106560
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (5 -> 5) ==

No changes in participating hosts


== Build changes ==

* Linux: CI_DRM_4244 -> Patchwork_9126

CI_DRM_4244: 475c2ec7b8c6e01cce9a360b9839dc0dd0fa9629 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4499: f560ae5a464331f03f0a669ed46b8c9e56526187 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_9126: e56337e6a35dc880c13010e17245256799793498 @ git://anongit.freedesktop.org/gfx-ci/linux

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9126/shards.html
Tomasz Lis
2018-07-06 15:52:00 UTC
Permalink
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

v5: Renamed inject_preempt_context(). (Daniele)
Removed duplicated GEM_BUG_ON() on HWACK (Daniele)

Bspec: 18922
Cc: Joonas Lahtinen <***@linux.intel.com>
Cc: Chris Wilson <***@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <***@intel.com>
Cc: Michal Winiarski <***@intel.com>
Cc: Mika Kuoppala <***@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <***@intel.com>
Signed-off-by: Tomasz Lis <***@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem_context.c | 3 +-
drivers/gpu/drm/i915/i915_pci.c | 3 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 114 ++++++++++++++++++++-----------
drivers/gpu/drm/i915/intel_lrc.h | 1 +
6 files changed, 84 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 91a7e4f..c84a66a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2533,6 +2533,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)

#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b10770c..bf7faa7 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -464,7 +464,8 @@ destroy_kernel_context(struct i915_gem_context **ctxp)

static bool needs_preempt_context(struct drm_i915_private *i915)
{
- return HAS_LOGICAL_RING_PREEMPTION(i915);
+ return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+ !HAS_HW_PREEMPT_TO_IDLE(i915);
}

int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 55543f1..2da7e77 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -593,7 +593,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1

static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 633f9fb..0be7e03 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ab89dab..aed4aeb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -155,6 +155,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)

#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -525,34 +526,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
port_set(port, port_pack(i915_request_get(rq), port_count(port)));
}

-static void inject_preempt_context(struct intel_engine_cs *engine)
+static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce =
- to_intel_context(engine->i915->preempt_context, engine);
- unsigned int n;
-
- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
- GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));

- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
+ if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);
+
+ /*
+ * If we have hardware preempt-to-idle, we do not need to
+ * inject any job to the hardware. We only set a flag.
+ */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+ } else {
+ struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+ unsigned int n;

- write_desc(execlists, ce->lrc_desc, n);
+ GEM_BUG_ON(execlists->preempt_complete_status !=
+ upper_32_bits(ce->lrc_desc));
+ GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));

- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ for (n = execlists_num_ports(execlists); --n; )
+ write_desc(execlists, 0, n);
+
+ write_desc(execlists, ce->lrc_desc, n);
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ }

execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -627,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
return;

if (need_preempt(engine, last, execlists->queue_priority)) {
- inject_preempt_context(engine);
+ execlist_send_preempt_to_idle(engine);
return;
}

@@ -1020,22 +1036,43 @@ static void process_csb(struct intel_engine_cs *engine)
execlists->active);

status = buf[2 * head];
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
-
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ /*
+ * Check if preempted from idle to idle directly.
+ * The STATUS_IDLE_ACTIVE flag is used to mark
+ * such transition.
+ */
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
+ /*
+ * We could not have COMPLETED anything
+ * if we were idle before preemption.
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+ } else {
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;

- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ /* We should never get a COMPLETED | IDLE_ACTIVE! */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ }

- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
complete_preempt_context(execlists);
continue;
@@ -2377,7 +2414,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;

engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;

engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194..3249e9b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510)
#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550)
#define EL_CTRL_LOAD (1 << 0)
+#define EL_CTRL_PREEMPT_TO_IDLE (1 << 1)

/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
--
2.7.4
Patchwork
2018-07-06 16:08:03 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev5)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
90f487fed124 drm/i915/gen11: Preempt-to-idle support in execlists.
-:141: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "!execlists->ctrl_reg"
#141: FILE: drivers/gpu/drm/i915/intel_lrc.c:539:
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);

-:210: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#210: FILE: drivers/gpu/drm/i915/intel_lrc.c:1045:
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {

-:211: CHECK:BRACES: Blank lines aren't necessary after an open brace '{'
#211: FILE: drivers/gpu/drm/i915/intel_lrc.c:1046:
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+

-:244: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#244: FILE: drivers/gpu/drm/i915/intel_lrc.c:1075:
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
^

total: 0 errors, 0 warnings, 4 checks, 192 lines checked
Patchwork
2018-07-06 16:08:59 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev5)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/gen11: Preempt-to-idle support in execlists.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3655:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3657:16: warning: expression using sizeof(void)
Patchwork
2018-07-06 16:25:21 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev5)
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4446 -> Patchwork_9572 =

== Summary - SUCCESS ==

No regressions found.

External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/5/mbox/

== Known issues ==

Here are the changes found in Patchwork_9572 that come from known issues:

=== IGT changes ===

==== Warnings ====

***@gem_exec_suspend@basic-s4-devices:
{fi-kbl-8809g}: DMESG-WARN (fdo#107139) -> INCOMPLETE (fdo#107139)


{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).

fdo#107139 https://bugs.freedesktop.org/show_bug.cgi?id=107139


== Participating hosts (47 -> 41) ==

Missing (6): fi-ilk-m540 fi-bxt-dsi fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600


== Build changes ==

* Linux: CI_DRM_4446 -> Patchwork_9572

CI_DRM_4446: 95944426a9ffda186843c78f2f925494e1bc53c5 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4543: 366eed37c7c71217e1cb1f3be5e26358a41f0001 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_9572: 90f487fed124443c7a8d557fa0e54dcbbcbb046e @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

90f487fed124 drm/i915/gen11: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9572/issues.html
Patchwork
2018-07-07 14:09:06 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev5)
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4446_full -> Patchwork_9572_full =

== Summary - WARNING ==

Minor unknown changes coming with Patchwork_9572_full need to be verified
manually.

If you think the reported changes have nothing to do with the changes
introduced in Patchwork_9572_full, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.



== Possible new issues ==

Here are the unknown changes that may have been introduced in Patchwork_9572_full:

=== IGT changes ===

==== Warnings ====

***@gem_exec_schedule@deep-blt:
shard-kbl: PASS -> SKIP

***@gem_exec_schedule@deep-vebox:
shard-kbl: SKIP -> PASS


== Known issues ==

Here are the changes found in Patchwork_9572_full that come from known issues:

=== IGT changes ===

==== Issues hit ====

***@drv_suspend@shrink:
shard-hsw: PASS -> INCOMPLETE (fdo#103540, fdo#106886)

***@kms_atomic_transition@1x-modeset-transitions-nonblocking:
shard-glk: PASS -> FAIL (fdo#105703)

***@kms_draw_crc@draw-method-rgb565-mmap-wc-ytiled:
shard-glk: PASS -> FAIL (fdo#103184)

***@kms_flip@2x-flip-vs-blocking-wf-vblank:
shard-glk: PASS -> FAIL (fdo#100368) +1

***@kms_flip@modeset-vs-vblank-race-interruptible:
shard-hsw: PASS -> FAIL (fdo#103060)

***@kms_flip_tiling@flip-x-tiled:
shard-glk: PASS -> FAIL (fdo#103822)

***@kms_rotation_crc@sprite-rotation-270:
shard-apl: PASS -> FAIL (fdo#103925)

***@kms_setmode@basic:
shard-apl: PASS -> FAIL (fdo#99912)


==== Possible fixes ====

***@kms_cursor_legacy@2x-long-nonblocking-modeset-vs-cursor-atomic:
shard-glk: FAIL (fdo#106509) -> PASS

***@kms_flip@2x-plain-flip-fb-recreate:
shard-glk: FAIL (fdo#100368) -> PASS

***@kms_universal_plane@cursor-fb-leak-pipe-c:
shard-apl: FAIL -> PASS


fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#103184 https://bugs.freedesktop.org/show_bug.cgi?id=103184
fdo#103540 https://bugs.freedesktop.org/show_bug.cgi?id=103540
fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
fdo#105703 https://bugs.freedesktop.org/show_bug.cgi?id=105703
fdo#106509 https://bugs.freedesktop.org/show_bug.cgi?id=106509
fdo#106886 https://bugs.freedesktop.org/show_bug.cgi?id=106886
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (5 -> 5) ==

No changes in participating hosts


== Build changes ==

* Linux: CI_DRM_4446 -> Patchwork_9572

CI_DRM_4446: 95944426a9ffda186843c78f2f925494e1bc53c5 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4543: 366eed37c7c71217e1cb1f3be5e26358a41f0001 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_9572: 90f487fed124443c7a8d557fa0e54dcbbcbb046e @ git://anongit.freedesktop.org/gfx-ci/linux
piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9572/shards.html
Tomasz Lis
2018-07-16 13:07:16 UTC
Permalink
The patch adds a parameter to control the data port coherency functionality
on a per-context level. When the IOCTL is called, a command to switch data
port coherency state is added to the ordered list. All prior requests are
executed on old coherency settings, and all exec requests after the IOCTL
will use new settings.

Rationale:

The OpenCL driver develpers requested a functionality to control cache
coherency at data port level. Keeping the coherency at that level is disabled
by default due to its performance costs. OpenCL driver is planning to
enable it for a small subset of submissions, when such functionality is
required. Below are answers to basic question explaining background
of the functionality and reasoning for the proposed implementation:

1. Why do we need a coherency enable/disable switch for memory that is shared
between CPU and GEN (GPU)?

Memory coherency between CPU and GEN, while being a great feature that enables
CL_MEM_SVM_FINE_GRAIN_BUFFER OCL capability on Intel GEN architecture, adds
overhead related to tracking (snooping) memory inside different cache units
(L1$, L2$, L3$, LLC$, etc.). At the same time, minority of modern OCL
applications actually use CL_MEM_SVM_FINE_GRAIN_BUFFER (and hence require
memory coherency between CPU and GPU). The goal of coherency enable/disable
switch is to remove overhead of memory coherency when memory coherency is not
needed.

2. Why do we need a global coherency switch?

In order to support I/O commands from within EUs (Execution Units), Intel GEN
ISA (GEN Instruction Set Assembly) contains dedicated "send" instructions.
These send instructions provide several addressing models. One of these
addressing models (named "stateless") provides most flexible I/O using plain
virtual addresses (as opposed to buffer_handle+offset models). This "stateless"
model is similar to regular memory load/store operations available on typical
CPUs. Since this model provides I/O using arbitrary virtual addresses, it
enables algorithmic designs that are based on pointer-to-pointer (e.g. buffer
of pointers) concepts. For instance, it allows creating tree-like data
structures such as:
________________
| NODE1 |
| uint64_t data |
+----------------|
| NODE* | NODE*|
+--------+-------+
/ \
________________/ \________________
| NODE2 | | NODE3 |
| uint64_t data | | uint64_t data |
+----------------| +----------------|
| NODE* | NODE*| | NODE* | NODE*|
+--------+-------+ +--------+-------+

Please note that pointers inside such structures can point to memory locations
in different OCL allocations - e.g. NODE1 and NODE2 can reside in one OCL
allocation while NODE3 resides in a completely separate OCL allocation.
Additionally, such pointers can be shared with CPU (i.e. using SVM - Shared
Virtual Memory feature). Using pointers from different allocations doesn't
affect the stateless addressing model which even allows scattered reading from
different allocations at the same time (i.e. by utilizing SIMD-nature of send
instructions).

When it comes to coherency programming, send instructions in stateless model
can be encoded (at ISA level) to either use or disable coherency. However, for
generic OCL applications (such as example with tree-like data structure), OCL
compiler is not able to determine origin of memory pointed to by an arbitrary
pointer - i.e. is not able to track given pointer back to a specific
allocation. As such, it's not able to decide whether coherency is needed or not
for specific pointer (or for specific I/O instruction). As a result, compiler
encodes all stateless sends as coherent (doing otherwise would lead to
functional issues resulting from data corruption). Please note that it would be
possible to workaround this (e.g. based on allocations map and pointer bounds
checking prior to each I/O instruction) but the performance cost of such
workaround would be many times greater than the cost of keeping coherency
always enabled. As such, enabling/disabling memory coherency at GEN ISA level
is not feasible and alternative method is needed.

Such alternative solution is to have a global coherency switch that allows
disabling coherency for single (though entire) GPU submission. This is
beneficial because this way we:
* can enable (and pay for) coherency only in submissions that actually need
coherency (submissions that use CL_MEM_SVM_FINE_GRAIN_BUFFER resources)
* don't care about coherency at GEN ISA granularity (no performance impact)

3. Will coherency switch be used frequently?

There are scenarios that will require frequent toggling of the coherency
switch.
E.g. an application has two OCL compute kernels: kern_master and kern_worker.
kern_master uses, concurrently with CPU, some fine grain SVM resources
(CL_MEM_SVM_FINE_GRAIN_BUFFER). These resources contain descriptors of
computational work that needs to be executed. kern_master analyzes incoming
work descriptors and populates a plain OCL buffer (non-fine-grain) with payload
for kern_worker. Once kern_master is done, kern_worker kicks-in and processes
the payload that kern_master produced. These two kernels work in a loop, one
after another. Since only kern_master requires coherency, kern_worker should
not be forced to pay for it. This means that we need to have the ability to
toggle coherency switch on or off per each GPU submission:
(ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...

v2: Fixed compilation warning.
v3: Refactored the patch to add IOCTL instead of exec flag.
v4: Renamed and documented the API flag. Used strict values.
Removed redundant GEM_WARN_ON()s. Improved to coding standard.
Introduced a macro for checking whether hardware supports the feature.
v5: Renamed some locals. Made the flag write to be lazy.
Updated comments to remove misconceptions. Added gen11 support.
v6: Moved the flag write to gen8_enit_flush_render(). Renamed some functions.
Moved all flags checking to one place. Added mutex check.

Cc: Joonas Lahtinen <***@linux.intel.com>
Cc: Tvrtko Ursulin <***@intel.com>
Cc: Chris Wilson <***@chris-wilson.co.uk>
Cc: Michal Winiarski <***@intel.com>

Bspec: 11419
Bspec: 19175
Signed-off-by: Tomasz Lis <***@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 1 +
drivers/gpu/drm/i915/i915_gem_context.c | 29 +++++++++++++--
drivers/gpu/drm/i915/i915_gem_context.h | 17 +++++++++
drivers/gpu/drm/i915/intel_lrc.c | 66 ++++++++++++++++++++++++++++++++-
include/uapi/drm/i915_drm.h | 7 ++++
5 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4fb9373..bae3999 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2524,6 +2524,7 @@ intel_info(const struct drm_i915_private *dev_priv)
#define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED))
#define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \
IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
+#define HAS_DATA_PORT_COHERENCY(dev_priv) (INTEL_GEN(dev_priv) >= 9)

#define HWS_NEEDS_PHYSICAL(dev_priv) ((dev_priv)->info.hws_needs_physical)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b10770c..44ebc31 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -784,6 +784,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
+ struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_context *ctx;
@@ -804,10 +805,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
case I915_CONTEXT_PARAM_GTT_SIZE:
if (ctx->ppgtt)
args->value = ctx->ppgtt->vm.total;
- else if (to_i915(dev)->mm.aliasing_ppgtt)
- args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+ else if (i915->mm.aliasing_ppgtt)
+ args->value = i915->mm.aliasing_ppgtt->vm.total;
else
- args->value = to_i915(dev)->ggtt.vm.total;
+ args->value = i915->ggtt.vm.total;
break;
case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
args->value = i915_gem_context_no_error_capture(ctx);
@@ -818,6 +819,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
case I915_CONTEXT_PARAM_PRIORITY:
args->value = ctx->sched.priority;
break;
+ case I915_CONTEXT_PARAM_DATA_PORT_COHERENCY:
+ if (!HAS_DATA_PORT_COHERENCY(i915))
+ ret = -ENODEV;
+ else
+ args->value = i915_gem_context_is_data_port_coherent(ctx);
+ break;
default:
ret = -EINVAL;
break;
@@ -830,6 +837,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
+ struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_context *ctx;
@@ -880,7 +888,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,

if (args->size)
ret = -EINVAL;
- else if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+ else if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
ret = -ENODEV;
else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
priority < I915_CONTEXT_MIN_USER_PRIORITY)
@@ -893,6 +901,19 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
}
break;

+ case I915_CONTEXT_PARAM_DATA_PORT_COHERENCY:
+ if (args->size)
+ ret = -EINVAL;
+ else if (!HAS_DATA_PORT_COHERENCY(i915))
+ ret = -ENODEV;
+ else if (args->value == 1)
+ i915_gem_context_set_data_port_coherent(ctx);
+ else if (args->value == 0)
+ i915_gem_context_clear_data_port_coherent(ctx);
+ else
+ ret = -EINVAL;
+ break;
+
default:
ret = -EINVAL;
break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index b116e49..9312343 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -126,6 +126,8 @@ struct i915_gem_context {
#define CONTEXT_BANNABLE 3
#define CONTEXT_BANNED 4
#define CONTEXT_FORCE_SINGLE_SUBMISSION 5
+#define CONTEXT_DATA_PORT_COHERENT_REQUESTED 6
+#define CONTEXT_DATA_PORT_COHERENT_ACTIVE 7

/**
* @hw_id: - unique identifier for the context
@@ -257,6 +259,21 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
}

+static inline bool i915_gem_context_is_data_port_coherent(struct i915_gem_context *ctx)
+{
+ return test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+}
+
+static inline void i915_gem_context_set_data_port_coherent(struct i915_gem_context *ctx)
+{
+ __set_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+}
+
+static inline void i915_gem_context_clear_data_port_coherent(struct i915_gem_context *ctx)
+{
+ __clear_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+}
+
static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
{
return c->user_handle == DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6fef9d1..6a08e10 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -259,6 +259,63 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
ce->lrc_desc = desc;
}

+static int emit_set_data_port_coherency(struct i915_request *rq, bool enable)
+{
+ u32 *cs;
+ i915_reg_t reg;
+
+ GEM_BUG_ON(rq->engine->class != RENDER_CLASS);
+ GEM_BUG_ON(INTEL_GEN(rq->i915) < 9);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ if (INTEL_GEN(rq->i915) >= 11)
+ reg = ICL_HDC_MODE;
+ else if (INTEL_GEN(rq->i915) >= 10)
+ reg = CNL_HDC_CHICKEN0;
+ else
+ reg = HDC_CHICKEN0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(reg);
+ /* Enabling coherency means disabling the bit which forces it off */
+ if (enable)
+ *cs++ = _MASKED_BIT_DISABLE(HDC_FORCE_NON_COHERENT);
+ else
+ *cs++ = _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT);
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int
+intel_lr_context_update_data_port_coherency(struct i915_request *rq)
+{
+ struct i915_gem_context *ctx = rq->gem_context;
+ bool enable = test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+ int ret;
+
+ lockdep_assert_held(&rq->i915->drm.struct_mutex);
+
+ if (test_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags) == enable)
+ return 0;
+
+ ret = emit_set_data_port_coherency(rq, enable);
+
+ if (!ret) {
+ if (enable)
+ __set_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
+ else
+ __clear_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
+ }
+
+ return ret;
+}
+
static struct i915_priolist *
lookup_priolist(struct intel_engine_cs *engine, int prio)
{
@@ -2133,7 +2190,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
bool vf_flush_wa = false, dc_flush_wa = false;
u32 *cs, flags = 0;
- int len;
+ int err, len;

flags |= PIPE_CONTROL_CS_STALL;

@@ -2164,6 +2221,13 @@ static int gen8_emit_flush_render(struct i915_request *request,
/* WaForGAMHang:kbl */
if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
dc_flush_wa = true;
+
+ /* Emit the switch of data port coherency state if needed */
+ err = intel_lr_context_update_data_port_coherency(request);
+ if (GEM_WARN_ON(err)) {
+ DRM_DEBUG("Data Port Coherency toggle failed.\n");
+ return err;
+ }
}

len = 6;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7f5634c..6ece759 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
#define I915_CONTEXT_DEFAULT_PRIORITY 0
#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
+/*
+ * When data port level coherency is enabled, the GPU will update memory
+ * buffers shared with CPU, by forcing internal cache units to send memory
+ * writes to higher level caches faster. Enabling data port coherency has
+ * a performance cost.
+ */
+#define I915_CONTEXT_PARAM_DATA_PORT_COHERENCY 0x7
__u64 value;
};
--
2.7.4
Tvrtko Ursulin
2018-07-16 13:35:42 UTC
Permalink
Post by Tomasz Lis
The patch adds a parameter to control the data port coherency functionality
on a per-context level. When the IOCTL is called, a command to switch data
port coherency state is added to the ordered list. All prior requests are
executed on old coherency settings, and all exec requests after the IOCTL
will use new settings.
The OpenCL driver develpers requested a functionality to control cache
coherency at data port level. Keeping the coherency at that level is disabled
by default due to its performance costs. OpenCL driver is planning to
enable it for a small subset of submissions, when such functionality is
required. Below are answers to basic question explaining background
1. Why do we need a coherency enable/disable switch for memory that is shared
between CPU and GEN (GPU)?
Memory coherency between CPU and GEN, while being a great feature that enables
CL_MEM_SVM_FINE_GRAIN_BUFFER OCL capability on Intel GEN architecture, adds
overhead related to tracking (snooping) memory inside different cache units
(L1$, L2$, L3$, LLC$, etc.). At the same time, minority of modern OCL
applications actually use CL_MEM_SVM_FINE_GRAIN_BUFFER (and hence require
memory coherency between CPU and GPU). The goal of coherency enable/disable
switch is to remove overhead of memory coherency when memory coherency is not
needed.
2. Why do we need a global coherency switch?
In order to support I/O commands from within EUs (Execution Units), Intel GEN
ISA (GEN Instruction Set Assembly) contains dedicated "send" instructions.
These send instructions provide several addressing models. One of these
addressing models (named "stateless") provides most flexible I/O using plain
virtual addresses (as opposed to buffer_handle+offset models). This "stateless"
model is similar to regular memory load/store operations available on typical
CPUs. Since this model provides I/O using arbitrary virtual addresses, it
enables algorithmic designs that are based on pointer-to-pointer (e.g. buffer
of pointers) concepts. For instance, it allows creating tree-like data
________________
| NODE1 |
| uint64_t data |
+----------------|
| NODE* | NODE*|
+--------+-------+
/ \
________________/ \________________
| NODE2 | | NODE3 |
| uint64_t data | | uint64_t data |
+----------------| +----------------|
| NODE* | NODE*| | NODE* | NODE*|
+--------+-------+ +--------+-------+
Please note that pointers inside such structures can point to memory locations
in different OCL allocations - e.g. NODE1 and NODE2 can reside in one OCL
allocation while NODE3 resides in a completely separate OCL allocation.
Additionally, such pointers can be shared with CPU (i.e. using SVM - Shared
Virtual Memory feature). Using pointers from different allocations doesn't
affect the stateless addressing model which even allows scattered reading from
different allocations at the same time (i.e. by utilizing SIMD-nature of send
instructions).
When it comes to coherency programming, send instructions in stateless model
can be encoded (at ISA level) to either use or disable coherency. However, for
generic OCL applications (such as example with tree-like data structure), OCL
compiler is not able to determine origin of memory pointed to by an arbitrary
pointer - i.e. is not able to track given pointer back to a specific
allocation. As such, it's not able to decide whether coherency is needed or not
for specific pointer (or for specific I/O instruction). As a result, compiler
encodes all stateless sends as coherent (doing otherwise would lead to
functional issues resulting from data corruption). Please note that it would be
possible to workaround this (e.g. based on allocations map and pointer bounds
checking prior to each I/O instruction) but the performance cost of such
workaround would be many times greater than the cost of keeping coherency
always enabled. As such, enabling/disabling memory coherency at GEN ISA level
is not feasible and alternative method is needed.
Such alternative solution is to have a global coherency switch that allows
disabling coherency for single (though entire) GPU submission. This is
* can enable (and pay for) coherency only in submissions that actually need
coherency (submissions that use CL_MEM_SVM_FINE_GRAIN_BUFFER resources)
* don't care about coherency at GEN ISA granularity (no performance impact)
3. Will coherency switch be used frequently?
There are scenarios that will require frequent toggling of the coherency
switch.
E.g. an application has two OCL compute kernels: kern_master and kern_worker.
kern_master uses, concurrently with CPU, some fine grain SVM resources
(CL_MEM_SVM_FINE_GRAIN_BUFFER). These resources contain descriptors of
computational work that needs to be executed. kern_master analyzes incoming
work descriptors and populates a plain OCL buffer (non-fine-grain) with payload
for kern_worker. Once kern_master is done, kern_worker kicks-in and processes
the payload that kern_master produced. These two kernels work in a loop, one
after another. Since only kern_master requires coherency, kern_worker should
not be forced to pay for it. This means that we need to have the ability to
(ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...
v2: Fixed compilation warning.
v3: Refactored the patch to add IOCTL instead of exec flag.
v4: Renamed and documented the API flag. Used strict values.
Removed redundant GEM_WARN_ON()s. Improved to coding standard.
Introduced a macro for checking whether hardware supports the feature.
v5: Renamed some locals. Made the flag write to be lazy.
Updated comments to remove misconceptions. Added gen11 support.
v6: Moved the flag write to gen8_enit_flush_render(). Renamed some functions.
Moved all flags checking to one place. Added mutex check.
Bspec: 11419
Bspec: 19175
---
drivers/gpu/drm/i915/i915_drv.h | 1 +
drivers/gpu/drm/i915/i915_gem_context.c | 29 +++++++++++++--
drivers/gpu/drm/i915/i915_gem_context.h | 17 +++++++++
drivers/gpu/drm/i915/intel_lrc.c | 66 ++++++++++++++++++++++++++++++++-
include/uapi/drm/i915_drm.h | 7 ++++
5 files changed, 115 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4fb9373..bae3999 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2524,6 +2524,7 @@ intel_info(const struct drm_i915_private *dev_priv)
#define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED))
#define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \
IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
+#define HAS_DATA_PORT_COHERENCY(dev_priv) (INTEL_GEN(dev_priv) >= 9)
#define HWS_NEEDS_PHYSICAL(dev_priv) ((dev_priv)->info.hws_needs_physical)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b10770c..44ebc31 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -784,6 +784,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
+ struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_context *ctx;
@@ -804,10 +805,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
if (ctx->ppgtt)
args->value = ctx->ppgtt->vm.total;
- else if (to_i915(dev)->mm.aliasing_ppgtt)
- args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+ else if (i915->mm.aliasing_ppgtt)
+ args->value = i915->mm.aliasing_ppgtt->vm.total;
else
- args->value = to_i915(dev)->ggtt.vm.total;
+ args->value = i915->ggtt.vm.total;
break;
args->value = i915_gem_context_no_error_capture(ctx);
@@ -818,6 +819,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
args->value = ctx->sched.priority;
break;
+ if (!HAS_DATA_PORT_COHERENCY(i915))
+ ret = -ENODEV;
+ else
+ args->value = i915_gem_context_is_data_port_coherent(ctx);
+ break;
ret = -EINVAL;
break;
@@ -830,6 +837,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
+ struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_context *ctx;
@@ -880,7 +888,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
if (args->size)
ret = -EINVAL;
- else if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+ else if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
ret = -ENODEV;
else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
priority < I915_CONTEXT_MIN_USER_PRIORITY)
@@ -893,6 +901,19 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
}
break;
+ if (args->size)
+ ret = -EINVAL;
+ else if (!HAS_DATA_PORT_COHERENCY(i915))
+ ret = -ENODEV;
+ else if (args->value == 1)
+ i915_gem_context_set_data_port_coherent(ctx);
+ else if (args->value == 0)
+ i915_gem_context_clear_data_port_coherent(ctx);
+ else
+ ret = -EINVAL;
+ break;
+
ret = -EINVAL;
break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index b116e49..9312343 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -126,6 +126,8 @@ struct i915_gem_context {
#define CONTEXT_BANNABLE 3
#define CONTEXT_BANNED 4
#define CONTEXT_FORCE_SINGLE_SUBMISSION 5
+#define CONTEXT_DATA_PORT_COHERENT_REQUESTED 6
+#define CONTEXT_DATA_PORT_COHERENT_ACTIVE 7
/**
@@ -257,6 +259,21 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
}
+static inline bool i915_gem_context_is_data_port_coherent(struct i915_gem_context *ctx)
+{
+ return test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+}
+
+static inline void i915_gem_context_set_data_port_coherent(struct i915_gem_context *ctx)
+{
+ __set_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+}
+
+static inline void i915_gem_context_clear_data_port_coherent(struct i915_gem_context *ctx)
+{
+ __clear_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+}
+
static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
{
return c->user_handle == DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6fef9d1..6a08e10 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -259,6 +259,63 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
ce->lrc_desc = desc;
}
+static int emit_set_data_port_coherency(struct i915_request *rq, bool enable)
+{
+ u32 *cs;
+ i915_reg_t reg;
+
+ GEM_BUG_ON(rq->engine->class != RENDER_CLASS);
+ GEM_BUG_ON(INTEL_GEN(rq->i915) < 9);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ if (INTEL_GEN(rq->i915) >= 11)
+ reg = ICL_HDC_MODE;
+ else if (INTEL_GEN(rq->i915) >= 10)
+ reg = CNL_HDC_CHICKEN0;
+ else
+ reg = HDC_CHICKEN0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(reg);
+ /* Enabling coherency means disabling the bit which forces it off */
+ if (enable)
+ *cs++ = _MASKED_BIT_DISABLE(HDC_FORCE_NON_COHERENT);
+ else
+ *cs++ = _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT);
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int
+intel_lr_context_update_data_port_coherency(struct i915_request *rq)
+{
+ struct i915_gem_context *ctx = rq->gem_context;
+ bool enable = test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+ int ret;
+
+ lockdep_assert_held(&rq->i915->drm.struct_mutex);
+
+ if (test_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags) == enable)
+ return 0;
+
+ ret = emit_set_data_port_coherency(rq, enable);
+
+ if (!ret) {
+ if (enable)
+ __set_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
+ else
+ __clear_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
+ }
+
+ return ret;
+}
+
static struct i915_priolist *
lookup_priolist(struct intel_engine_cs *engine, int prio)
{
@@ -2133,7 +2190,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
bool vf_flush_wa = false, dc_flush_wa = false;
u32 *cs, flags = 0;
- int len;
+ int err, len;
flags |= PIPE_CONTROL_CS_STALL;
@@ -2164,6 +2221,13 @@ static int gen8_emit_flush_render(struct i915_request *request,
/* WaForGAMHang:kbl */
if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
dc_flush_wa = true;
+
+ /* Emit the switch of data port coherency state if needed */
+ err = intel_lr_context_update_data_port_coherency(request);
+ if (GEM_WARN_ON(err)) {
+ DRM_DEBUG("Data Port Coherency toggle failed.\n");
+ return err;
+ }
}
len = 6;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7f5634c..6ece759 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
#define I915_CONTEXT_DEFAULT_PRIORITY 0
#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
+/*
+ * When data port level coherency is enabled, the GPU will update memory
+ * buffers shared with CPU, by forcing internal cache units to send memory
+ * writes to higher level caches faster. Enabling data port coherency has
+ * a performance cost.
+ */
+#define I915_CONTEXT_PARAM_DATA_PORT_COHERENCY 0x7
__u64 value;
};
Looks good to me!

Reviewed-by: Tvrtko Ursulin <***@intel.com>

Regards,

Tvrtko
Joonas Lahtinen
2018-07-18 13:24:51 UTC
Permalink
Quoting Tomasz Lis (2018-07-16 16:07:16)
Post by Tomasz Lis
+static int emit_set_data_port_coherency(struct i915_request *rq, bool enable)
+{
+ u32 *cs;
+ i915_reg_t reg;
+
+ GEM_BUG_ON(rq->engine->class != RENDER_CLASS);
+ GEM_BUG_ON(INTEL_GEN(rq->i915) < 9);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ if (INTEL_GEN(rq->i915) >= 11)
+ reg = ICL_HDC_MODE;
+ else if (INTEL_GEN(rq->i915) >= 10)
+ reg = CNL_HDC_CHICKEN0;
+ else
+ reg = HDC_CHICKEN0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(reg);
+ /* Enabling coherency means disabling the bit which forces it off */
This comment is still spurious, please get rid of the habit of writing
comments about "what" the code is doing, useful comments should be
limited to "why", which is quite self explanatory here, that's the way
the register is.
Post by Tomasz Lis
+static int
+intel_lr_context_update_data_port_coherency(struct i915_request *rq)
+{
+ struct i915_gem_context *ctx = rq->gem_context;
+ bool enable = test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+ int ret;
+
+ lockdep_assert_held(&rq->i915->drm.struct_mutex);
+
+ if (test_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags) == enable)
+ return 0;
+
+ ret = emit_set_data_port_coherency(rq, enable);
+
+ if (!ret) {
+ if (enable)
+ __set_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
+ else
+ __clear_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
+ }
Do we have indication that the hardware feature will be unreliable in
responding to the requests? I don't think you need the differentiation
of requested vs. active. If there is an error, we can just report back to
the user as a failed IOCTL. Now it adds unnecessary complication for no benefit.
Post by Tomasz Lis
@@ -2164,6 +2221,13 @@ static int gen8_emit_flush_render(struct i915_request *request,
/* WaForGAMHang:kbl */
if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
dc_flush_wa = true;
+
+ /* Emit the switch of data port coherency state if needed */
Ditto for spurious comment, just about what the code does.
Post by Tomasz Lis
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
#define I915_CONTEXT_DEFAULT_PRIORITY 0
#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
+/*
+ * When data port level coherency is enabled, the GPU will update memory
+ * buffers shared with CPU, by forcing internal cache units to send memory
+ * writes to higher level caches faster. Enabling data port coherency has
+ * a performance cost.
+ */
I was under impression this is enabled by default and it can be disabled
for a performance optimization?

Regards, Joonas
Tvrtko Ursulin
2018-07-18 14:42:13 UTC
Permalink
Post by Joonas Lahtinen
Quoting Tomasz Lis (2018-07-16 16:07:16)
Post by Tomasz Lis
+static int emit_set_data_port_coherency(struct i915_request *rq, bool enable)
+{
+ u32 *cs;
+ i915_reg_t reg;
+
+ GEM_BUG_ON(rq->engine->class != RENDER_CLASS);
+ GEM_BUG_ON(INTEL_GEN(rq->i915) < 9);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ if (INTEL_GEN(rq->i915) >= 11)
+ reg = ICL_HDC_MODE;
+ else if (INTEL_GEN(rq->i915) >= 10)
+ reg = CNL_HDC_CHICKEN0;
+ else
+ reg = HDC_CHICKEN0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(reg);
+ /* Enabling coherency means disabling the bit which forces it off */
This comment is still spurious, please get rid of the habit of writing
comments about "what" the code is doing, useful comments should be
limited to "why", which is quite self explanatory here, that's the way
the register is.
Post by Tomasz Lis
+static int
+intel_lr_context_update_data_port_coherency(struct i915_request *rq)
+{
+ struct i915_gem_context *ctx = rq->gem_context;
+ bool enable = test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+ int ret;
+
+ lockdep_assert_held(&rq->i915->drm.struct_mutex);
+
+ if (test_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags) == enable)
+ return 0;
+
+ ret = emit_set_data_port_coherency(rq, enable);
+
+ if (!ret) {
+ if (enable)
+ __set_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
+ else
+ __clear_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
+ }
Do we have indication that the hardware feature will be unreliable in
responding to the requests? I don't think you need the differentiation
of requested vs. active. If there is an error, we can just report back to
the user as a failed IOCTL. Now it adds unnecessary complication for no benefit.
Requested vs active is for implementing the lazy emit.

AFAIR it does propagate the error out of execbuf (although we never ever
expect it to happen), and this is just to keep the internal
house-keeping in sync.

Regards,

Tvrtko
Post by Joonas Lahtinen
Post by Tomasz Lis
@@ -2164,6 +2221,13 @@ static int gen8_emit_flush_render(struct i915_request *request,
/* WaForGAMHang:kbl */
if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
dc_flush_wa = true;
+
+ /* Emit the switch of data port coherency state if needed */
Ditto for spurious comment, just about what the code does.
Post by Tomasz Lis
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
#define I915_CONTEXT_DEFAULT_PRIORITY 0
#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
+/*
+ * When data port level coherency is enabled, the GPU will update memory
+ * buffers shared with CPU, by forcing internal cache units to send memory
+ * writes to higher level caches faster. Enabling data port coherency has
+ * a performance cost.
+ */
I was under impression this is enabled by default and it can be disabled
for a performance optimization?
Regards, Joonas
_______________________________________________
Intel-gfx mailing list
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lis, Tomasz
2018-07-18 15:28:32 UTC
Permalink
Post by Tvrtko Ursulin
Post by Joonas Lahtinen
Quoting Tomasz Lis (2018-07-16 16:07:16)
Post by Tomasz Lis
+static int emit_set_data_port_coherency(struct i915_request *rq, bool enable)
+{
+       u32 *cs;
+       i915_reg_t reg;
+
+       GEM_BUG_ON(rq->engine->class != RENDER_CLASS);
+       GEM_BUG_ON(INTEL_GEN(rq->i915) < 9);
+
+       cs = intel_ring_begin(rq, 4);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       if (INTEL_GEN(rq->i915) >= 11)
+               reg = ICL_HDC_MODE;
+       else if (INTEL_GEN(rq->i915) >= 10)
+               reg = CNL_HDC_CHICKEN0;
+       else
+               reg = HDC_CHICKEN0;
+
+       *cs++ = MI_LOAD_REGISTER_IMM(1);
+       *cs++ = i915_mmio_reg_offset(reg);
+       /* Enabling coherency means disabling the bit which forces
it off */
This comment is still spurious, please get rid of the habit of writing
comments about "what" the code is doing, useful comments should be
limited to "why", which is quite self explanatory here, that's the way
the register is.
Ok, I will read the related doc:
https://www.kernel.org/doc/html/v4.10/process/coding-style.html#commenting
Post by Tvrtko Ursulin
Post by Joonas Lahtinen
Post by Tomasz Lis
+static int
+intel_lr_context_update_data_port_coherency(struct i915_request *rq)
+{
+       struct i915_gem_context *ctx = rq->gem_context;
+       bool enable = test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED,
&ctx->flags);
+       int ret;
+
+ lockdep_assert_held(&rq->i915->drm.struct_mutex);
+
+       if (test_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags)
== enable)
+               return 0;
+
+       ret = emit_set_data_port_coherency(rq, enable);
+
+       if (!ret) {
+               if (enable)
+ __set_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
+               else
+ __clear_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
+       }
Do we have indication that the hardware feature will be unreliable in
responding to the requests? I don't think you need the differentiation
of requested vs. active. If there is an error, we can just report back to
the user as a failed IOCTL. Now it adds unnecessary complication for no benefit.
Requested vs active is for implementing the lazy emit.
AFAIR it does propagate the error out of execbuf (although we never
ever expect it to happen), and this is just to keep the internal
house-keeping in sync.
Regards,
Tvrtko
Post by Joonas Lahtinen
Post by Tomasz Lis
@@ -2164,6 +2221,13 @@ static int gen8_emit_flush_render(struct i915_request *request,
                 /* WaForGAMHang:kbl */
                 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
                         dc_flush_wa = true;
+
+               /* Emit the switch of data port coherency state if
needed */
Ditto for spurious comment, just about what the code does.
Post by Tomasz Lis
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
  #define   I915_CONTEXT_MAX_USER_PRIORITY       1023 /* inclusive */
  #define   I915_CONTEXT_DEFAULT_PRIORITY                0
  #define   I915_CONTEXT_MIN_USER_PRIORITY       -1023 /* inclusive */
+/*
+ * When data port level coherency is enabled, the GPU will update memory
+ * buffers shared with CPU, by forcing internal cache units to send memory
+ * writes to higher level caches faster. Enabling data port
coherency has
+ * a performance cost.
+ */
I was under impression this is enabled by default and it can be disabled
for a performance optimization?
This is true, coherency is kept by default. We disable it as a
workaround: performance-related for gen11, and due to minor hardware
issue on previous platforms. See WaForceEnableNonCoherent.
-Tomasz
Post by Tvrtko Ursulin
Post by Joonas Lahtinen
Regards, Joonas
_______________________________________________
Intel-gfx mailing list
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Joonas Lahtinen
2018-07-19 07:12:42 UTC
Permalink
Quoting Lis, Tomasz (2018-07-18 18:28:32)
Post by Lis, Tomasz
Post by Joonas Lahtinen
Quoting Tomasz Lis (2018-07-16 16:07:16)
<SNIP>
Post by Lis, Tomasz
Post by Joonas Lahtinen
Post by Tomasz Lis
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
  #define   I915_CONTEXT_MAX_USER_PRIORITY       1023 /* inclusive */
  #define   I915_CONTEXT_DEFAULT_PRIORITY                0
  #define   I915_CONTEXT_MIN_USER_PRIORITY       -1023 /* inclusive */
+/*
+ * When data port level coherency is enabled, the GPU will update memory
+ * buffers shared with CPU, by forcing internal cache units to send memory
+ * writes to higher level caches faster. Enabling data port coherency has
+ * a performance cost.
+ */
I was under impression this is enabled by default and it can be disabled
for a performance optimization?
This is true, coherency is kept by default. We disable it as a
workaround: performance-related for gen11, and due to minor hardware
issue on previous platforms. See WaForceEnableNonCoherent.
Ok, then you definitely want to rephrase the comment to bake that
information in it. Now it sounds like it needs to be turned on to have
coherency.

Regards, Joonas
Lis, Tomasz
2018-07-19 15:10:00 UTC
Permalink
Post by Joonas Lahtinen
Quoting Lis, Tomasz (2018-07-18 18:28:32)
Post by Lis, Tomasz
Post by Joonas Lahtinen
Quoting Tomasz Lis (2018-07-16 16:07:16)
<SNIP>
Post by Lis, Tomasz
Post by Joonas Lahtinen
Post by Tomasz Lis
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
  #define   I915_CONTEXT_MAX_USER_PRIORITY       1023 /* inclusive */
  #define   I915_CONTEXT_DEFAULT_PRIORITY                0
  #define   I915_CONTEXT_MIN_USER_PRIORITY       -1023 /* inclusive */
+/*
+ * When data port level coherency is enabled, the GPU will update memory
+ * buffers shared with CPU, by forcing internal cache units to send memory
+ * writes to higher level caches faster. Enabling data port coherency has
+ * a performance cost.
+ */
I was under impression this is enabled by default and it can be disabled
for a performance optimization?
This is true, coherency is kept by default. We disable it as a
workaround: performance-related for gen11, and due to minor hardware
issue on previous platforms. See WaForceEnableNonCoherent.
Ok, then you definitely want to rephrase the comment to bake that
information in it. Now it sounds like it needs to be turned on to have
coherency.
I'm not sure if I understand what you're asking for.
Should I emphasize that the feature is disabled unless the flag is set?
This seem obvious...
Or should I provide the reason why it is disabled on specific platforms?
This should probably be done within workaround setup, not in user api
definition. Or maybe it's enough to have it in Bspec? Bspec links are
provided in the patch.
Or should I just mention the workaround name?
-Tomasz
Post by Joonas Lahtinen
Regards, Joonas
Patchwork
2018-07-16 14:36:38 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev6)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
26cf0c5fcc46 drm/i915: Add IOCTL Param to control data port coherency.
-:15: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#15:
coherency at data port level. Keeping the coherency at that level is disabled

total: 0 errors, 1 warnings, 0 checks, 199 lines checked
Patchwork
2018-07-16 14:37:34 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev6)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915: Add IOCTL Param to control data port coherency.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3653:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3654:16: warning: expression using sizeof(void)
Patchwork
2018-07-16 14:58:14 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev6)
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4493 -> Patchwork_9676 =

== Summary - SUCCESS ==

No regressions found.

External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/6/mbox/

== Known issues ==

Here are the changes found in Patchwork_9676 that come from known issues:

=== IGT changes ===

==== Issues hit ====

***@drv_selftest@live_hangcheck:
fi-skl-guc: PASS -> DMESG-FAIL (fdo#107174)

***@kms_chamelium@dp-crc-fast:
fi-kbl-7500u: PASS -> DMESG-FAIL (fdo#103841)

***@kms_frontbuffer_tracking@basic:
fi-hsw-peppy: PASS -> DMESG-FAIL (fdo#102614, fdo#106103)


==== Possible fixes ====

***@drv_selftest@live_hangcheck:
fi-bdw-5557u: DMESG-FAIL (fdo#106560) -> PASS

***@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
fi-snb-2520m: INCOMPLETE (fdo#103713) -> PASS


fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841
fdo#106103 https://bugs.freedesktop.org/show_bug.cgi?id=106103
fdo#106560 https://bugs.freedesktop.org/show_bug.cgi?id=106560
fdo#107174 https://bugs.freedesktop.org/show_bug.cgi?id=107174


== Participating hosts (44 -> 41) ==

Additional (1): fi-skl-6700hq
Missing (4): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-hsw-4200u


== Build changes ==

* Linux: CI_DRM_4493 -> Patchwork_9676

CI_DRM_4493: c69b4c1274cccaa270c1e4daa68228724c80603a @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4558: d8e97e1710b27a3931a1c53d1dd88c0e709c085b @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_9676: 26cf0c5fcc461bb5aca97673189ba1b329385a76 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

26cf0c5fcc46 drm/i915: Add IOCTL Param to control data port coherency.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9676/issues.html
Patchwork
2018-07-16 19:26:18 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev6)
URL : https://patchwork.freedesktop.org/series/40747/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4493_full -> Patchwork_9676_full =

== Summary - FAILURE ==

Serious unknown changes coming with Patchwork_9676_full absolutely need to be
verified manually.

If you think the reported changes have nothing to do with the changes
introduced in Patchwork_9676_full, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.



== Possible new issues ==

Here are the unknown changes that may have been introduced in Patchwork_9676_full:

=== IGT changes ===

==== Possible regressions ====

***@gem_ctx_param@invalid-param-get:
shard-apl: PASS -> FAIL +1
shard-glk: PASS -> FAIL +1

***@gem_ctx_param@invalid-param-set:
shard-kbl: PASS -> FAIL +1
shard-hsw: PASS -> FAIL +1
shard-snb: PASS -> FAIL +1

***@gem_exec_schedule@preempt-hang-render:
shard-glk: NOTRUN -> DMESG-FAIL


==== Warnings ====

***@gem_exec_schedule@deep-render:
shard-kbl: SKIP -> PASS


== Known issues ==

Here are the changes found in Patchwork_9676_full that come from known issues:

=== IGT changes ===

==== Issues hit ====

***@drv_missed_irq:
shard-glk: NOTRUN -> INCOMPLETE (k.org#198133, fdo#103359)

***@kms_flip@2x-flip-vs-expired-vblank-interruptible:
shard-glk: PASS -> FAIL (fdo#105363)


==== Possible fixes ====

***@kms_cursor_legacy@cursor-vs-flip-toggle:
shard-hsw: FAIL (fdo#103355) -> PASS

***@kms_flip@2x-dpms-vs-vblank-race-interruptible:
shard-hsw: FAIL (fdo#103060) -> PASS +2

***@kms_flip@2x-plain-flip-fb-recreate:
shard-glk: FAIL (fdo#100368) -> PASS

***@kms_flip@dpms-vs-vblank-race:
shard-kbl: FAIL (fdo#103060) -> PASS

***@kms_flip@dpms-vs-vblank-race-interruptible:
shard-glk: FAIL (fdo#103060) -> PASS

***@kms_flip@flip-vs-expired-vblank:
shard-glk: FAIL (fdo#105363, fdo#102887) -> PASS


fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#103355 https://bugs.freedesktop.org/show_bug.cgi?id=103355
fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363
k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (5 -> 5) ==

No changes in participating hosts


== Build changes ==

* Linux: CI_DRM_4493 -> Patchwork_9676

CI_DRM_4493: c69b4c1274cccaa270c1e4daa68228724c80603a @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4558: d8e97e1710b27a3931a1c53d1dd88c0e709c085b @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_9676: 26cf0c5fcc461bb5aca97673189ba1b329385a76 @ git://anongit.freedesktop.org/gfx-ci/linux
piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9676/shards.html
Tomasz Lis
2018-10-15 17:29:18 UTC
Permalink
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

v5: Renamed inject_preempt_context(). (Daniele)
Removed duplicated GEM_BUG_ON() on HWACK (Daniele)

Bspec: 18922
Cc: Joonas Lahtinen <***@linux.intel.com>
Cc: Chris Wilson <***@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <***@intel.com>
Cc: Michal Winiarski <***@intel.com>
Cc: Mika Kuoppala <***@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <***@intel.com>
Signed-off-by: Tomasz Lis <***@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem_context.c | 3 +-
drivers/gpu/drm/i915/i915_pci.c | 3 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 109 +++++++++++++++++++++----------
drivers/gpu/drm/i915/intel_lrc.h | 1 +
6 files changed, 84 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3017ef0..4817438 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2597,6 +2597,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)

#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8cbe580..98ca20e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -529,7 +529,8 @@ static void init_contexts(struct drm_i915_private *i915)

static bool needs_preempt_context(struct drm_i915_private *i915)
{
- return HAS_LOGICAL_RING_PREEMPTION(i915);
+ return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+ !HAS_HW_PREEMPT_TO_IDLE(i915);
}

int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 0a05cc7..f708d97 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -597,7 +597,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN10_FEATURES, \
GEN(11), \
.ddb_size = 2048, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1

static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index af70026..7dcf0fd 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -104,6 +104,7 @@ enum intel_ppgtt {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ff0e2b3..4c2bfed 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -155,6 +155,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)

#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -488,29 +489,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
port_set(port, port_pack(i915_request_get(rq), port_count(port)));
}

-static void inject_preempt_context(struct intel_engine_cs *engine)
+static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce =
- to_intel_context(engine->i915->preempt_context, engine);
- unsigned int n;
+ GEM_TRACE("%s\n", engine->name);

- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
+ if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);

- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
- GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
+ /*
+ * If we have hardware preempt-to-idle, we do not need to
+ * inject any job to the hardware. We only set a flag.
+ */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+ } else {
+ struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+ unsigned int n;

- write_desc(execlists, ce->lrc_desc, n);
+ GEM_BUG_ON(execlists->preempt_complete_status !=
+ upper_32_bits(ce->lrc_desc));
+ GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));

- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ for (n = execlists_num_ports(execlists); --n; )
+ write_desc(execlists, 0, n);
+
+ write_desc(execlists, ce->lrc_desc, n);
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ }

execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -583,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
return;

if (need_preempt(engine, last, execlists->queue_priority)) {
- inject_preempt_context(engine);
+ execlist_send_preempt_to_idle(engine);
return;
}

@@ -910,22 +931,43 @@ static void process_csb(struct intel_engine_cs *engine)
execlists->active);

status = buf[2 * head];
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
-
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ /*
+ * Check if preempted from idle to idle directly.
+ * The STATUS_IDLE_ACTIVE flag is used to mark
+ * such transition.
+ */
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {

- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ /*
+ * We could not have COMPLETED anything
+ * if we were idle before preemption.
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+ } else {
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;

- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /* We should never get a COMPLETED | IDLE_ACTIVE! */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ }
+
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
complete_preempt_context(execlists);
continue;
@@ -2138,7 +2180,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;

engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;

engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f5a5502..871901a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -43,6 +43,7 @@
#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510)
#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550)
#define EL_CTRL_LOAD (1 << 0)
+#define EL_CTRL_PREEMPT_TO_IDLE (1 << 1)

/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
--
2.7.4
Joonas Lahtinen
2018-10-16 10:53:41 UTC
Permalink
Quoting Tomasz Lis (2018-10-15 20:29:18)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.
v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
v5: Renamed inject_preempt_context(). (Daniele)
Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
Bspec: 18922
This R-b was on v4, and should be indicated with # v4 comment.

The commit message doesn't say much about why preempting to idle is
beneficial? The pre-Gen11 codepath needs to be maintained anyway.

Regards, Joonas
Lis, Tomasz
2018-10-19 16:00:15 UTC
Permalink
Post by Joonas Lahtinen
Quoting Tomasz Lis (2018-10-15 20:29:18)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.
v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
v5: Renamed inject_preempt_context(). (Daniele)
Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
Bspec: 18922
This R-b was on v4, and should be indicated with # v4 comment.
The commit message doesn't say much about why preempting to idle is
beneficial? The pre-Gen11 codepath needs to be maintained anyway.
Regards, Joonas
The benefit is one less context switch - there is no "preempt context".
-Tomasz
Joonas Lahtinen
2018-10-23 09:13:31 UTC
Permalink
Quoting Lis, Tomasz (2018-10-19 19:00:15)
Post by Lis, Tomasz
Post by Joonas Lahtinen
Quoting Tomasz Lis (2018-10-15 20:29:18)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.
v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
v5: Renamed inject_preempt_context(). (Daniele)
Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
Bspec: 18922
This R-b was on v4, and should be indicated with # v4 comment.
The commit message doesn't say much about why preempting to idle is
beneficial? The pre-Gen11 codepath needs to be maintained anyway.
Regards, Joonas
The benefit is one less context switch - there is no "preempt context".
Yes.

But that still doesn't quite explain what material benefits there are? :)

Is there some actual workloads/microbenchmarks that get an improvement?

This alters the behavior between different platforms for a very delicate
feature, probably resulting in slightly different bugs. So there should
be some more reasoning than just because we can.

Regards, Joonas
Lis, Tomasz
2018-10-23 09:24:57 UTC
Permalink
Post by Joonas Lahtinen
Quoting Lis, Tomasz (2018-10-19 19:00:15)
Post by Lis, Tomasz
Post by Joonas Lahtinen
Quoting Tomasz Lis (2018-10-15 20:29:18)
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.
v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
v5: Renamed inject_preempt_context(). (Daniele)
Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
Bspec: 18922
This R-b was on v4, and should be indicated with # v4 comment.
The commit message doesn't say much about why preempting to idle is
beneficial? The pre-Gen11 codepath needs to be maintained anyway.
Regards, Joonas
The benefit is one less context switch - there is no "preempt context".
Yes.
But that still doesn't quite explain what material benefits there are? :)
Is there some actual workloads/microbenchmarks that get an improvement?
This alters the behavior between different platforms for a very delicate
feature, probably resulting in slightly different bugs. So there should
be some more reasoning than just because we can.
Regards, Joonas
Less context switching does imply perf improvement, though it would
require measurement - it might be hardly detectable. We may even lose
performance - without measurements, we don't know. So not a strong argument.

One more benefit I could think of is - GuC path will use
preempt-to-idle, so this would make execlists use the same path as GuC.
But that's not a strong argument as well.

I must agree - there doesn't seem to be any strong enough reason to go
with this change.
We might consider it after we have performance data.

-Tomasz
Patchwork
2018-10-15 17:44:24 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev7)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
40f273c57c71 drm/i915/icl: Preempt-to-idle support in execlists.
-:129: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "!execlists->ctrl_reg"
#129: FILE: drivers/gpu/drm/i915/intel_lrc.c:502:
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);

-:205: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#205: FILE: drivers/gpu/drm/i915/intel_lrc.c:940:
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {

-:239: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#239: FILE: drivers/gpu/drm/i915/intel_lrc.c:970:
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
^

total: 0 errors, 0 warnings, 3 checks, 187 lines checked
Patchwork
2018-10-15 17:45:20 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev7)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915/icl: Preempt-to-idle support in execlists.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3725:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3727:16: warning: expression using sizeof(void)
Patchwork
2018-10-15 18:07:41 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev7)
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4982 -> Patchwork_10461 =

== Summary - SUCCESS ==

No regressions found.

External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/7/mbox/

== Known issues ==

Here are the changes found in Patchwork_10461 that come from known issues:

=== IGT changes ===

==== Issues hit ====

***@amdgpu/***@cs-compute:
fi-kbl-8809g: NOTRUN -> FAIL (fdo#108094)

***@amdgpu/***@amd-to-i915:
fi-kbl-8809g: NOTRUN -> FAIL (fdo#107341)

***@gem_exec_suspend@basic-s3:
fi-kbl-soraka: NOTRUN -> INCOMPLETE (fdo#107556, fdo#107774, fdo#107859)

***@kms_frontbuffer_tracking@basic:
fi-byt-clapper: PASS -> FAIL (fdo#103167)


==== Possible fixes ====

***@drv_getparams_basic@basic-subslice-total:
fi-snb-2520m: DMESG-WARN (fdo#103713) -> PASS +10

***@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
fi-byt-clapper: FAIL (fdo#107362, fdo#103191) -> PASS
fi-icl-u2: INCOMPLETE (fdo#107713) -> PASS


fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#107341 https://bugs.freedesktop.org/show_bug.cgi?id=107341
fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362
fdo#107556 https://bugs.freedesktop.org/show_bug.cgi?id=107556
fdo#107713 https://bugs.freedesktop.org/show_bug.cgi?id=107713
fdo#107774 https://bugs.freedesktop.org/show_bug.cgi?id=107774
fdo#107859 https://bugs.freedesktop.org/show_bug.cgi?id=107859
fdo#108094 https://bugs.freedesktop.org/show_bug.cgi?id=108094


== Participating hosts (52 -> 47) ==

Additional (2): fi-kbl-soraka fi-skl-guc
Missing (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-apl-guc fi-ctg-p8600 fi-kbl-7560u


== Build changes ==

* Linux: CI_DRM_4982 -> Patchwork_10461

CI_DRM_4982: 6222b112cd485ea16d06c120531becf97ee57bc7 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4678: 9310a1265ceabeec736bdf0a76e1e0357c76c0b1 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_10461: 40f273c57c71aeb1957dd683859fdee8baffd13a @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

40f273c57c71 drm/i915/icl: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10461/issues.html
Patchwork
2018-10-15 23:55:24 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev7)
URL : https://patchwork.freedesktop.org/series/40747/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4982_full -> Patchwork_10461_full =

== Summary - FAILURE ==

Serious unknown changes coming with Patchwork_10461_full absolutely need to be
verified manually.

If you think the reported changes have nothing to do with the changes
introduced in Patchwork_10461_full, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.



== Possible new issues ==

Here are the unknown changes that may have been introduced in Patchwork_10461_full:

=== IGT changes ===

==== Possible regressions ====

***@kms_atomic_transition@1x-modeset-transitions:
shard-skl: NOTRUN -> FAIL


==== Warnings ====

***@pm_rc6_residency@rc6-accuracy:
shard-snb: PASS -> SKIP


== Known issues ==

Here are the changes found in Patchwork_10461_full that come from known issues:

=== IGT changes ===

==== Issues hit ====

***@drv_suspend@sysfs-reader:
shard-snb: PASS -> DMESG-WARN (fdo#102365)

***@gem_exec_schedule@pi-ringfull-render:
shard-skl: NOTRUN -> FAIL (fdo#103158)

***@gem_ppgtt@blt-vs-render-ctxn:
shard-skl: NOTRUN -> TIMEOUT (fdo#108039)

***@gem_userptr_blits@readonly-unsync:
shard-skl: NOTRUN -> INCOMPLETE (fdo#108074)

***@kms_busy@extended-modeset-hang-newfb-render-a:
shard-skl: NOTRUN -> DMESG-WARN (fdo#107956) +4

***@kms_ccs@pipe-a-crc-sprite-planes-basic:
shard-skl: NOTRUN -> FAIL (fdo#105458)

***@kms_ccs@pipe-b-crc-sprite-planes-basic:
shard-skl: NOTRUN -> FAIL (fdo#107725, fdo#108145)

***@kms_color@pipe-a-legacy-gamma:
shard-skl: NOTRUN -> FAIL (fdo#104782, fdo#108145)

***@kms_cursor_crc@cursor-256x256-suspend:
shard-skl: NOTRUN -> FAIL (fdo#103191, fdo#103232)

***@kms_cursor_crc@cursor-256x85-offscreen:
shard-skl: NOTRUN -> FAIL (fdo#103232)

***@kms_draw_crc@fill-fb:
shard-skl: NOTRUN -> FAIL (fdo#103184)

***@kms_fbcon_fbt@psr:
shard-skl: NOTRUN -> FAIL (fdo#107882)

***@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-blt:
shard-apl: PASS -> FAIL (fdo#103167)

***@kms_frontbuffer_tracking@fbc-1p-rte:
shard-apl: PASS -> FAIL (fdo#103167, fdo#105682)

***@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-onoff:
shard-glk: PASS -> FAIL (fdo#103167) +4

***@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-spr-indfb-fullscreen:
shard-skl: NOTRUN -> FAIL (fdo#105682)

***@kms_frontbuffer_tracking@fbcpsr-stridechange:
shard-skl: NOTRUN -> FAIL (fdo#105683)

***@kms_frontbuffer_tracking@psr-1p-primscrn-cur-indfb-draw-mmap-wc:
shard-skl: NOTRUN -> FAIL (fdo#103167) +2

***@kms_pipe_crc_basic@hang-read-crc-pipe-a:
shard-skl: NOTRUN -> FAIL (fdo#103191, fdo#107362)

***@kms_plane@pixel-format-pipe-b-planes:
shard-skl: NOTRUN -> DMESG-FAIL (fdo#103166, fdo#106885) +1

***@kms_plane@plane-position-covered-pipe-a-planes:
shard-apl: PASS -> FAIL (fdo#103166) +1

***@kms_plane_alpha_blend@pipe-a-alpha-7efc:
shard-skl: NOTRUN -> FAIL (fdo#108145) +5

***@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb:
shard-apl: PASS -> FAIL (fdo#108145)

***@kms_plane_alpha_blend@pipe-c-coverage-7efc:
shard-skl: NOTRUN -> FAIL (fdo#108146)

***@kms_plane_multiple@atomic-pipe-c-tiling-x:
shard-glk: PASS -> FAIL (fdo#103166)

***@kms_rotation_crc@exhaust-fences:
shard-skl: NOTRUN -> DMESG-WARN (fdo#105748)

***@pm_backlight@fade_with_suspend:
shard-skl: NOTRUN -> FAIL (fdo#107847)


==== Possible fixes ====

***@gem_wait@busy-default:
shard-snb: INCOMPLETE (fdo#105411) -> PASS

***@kms_busy@extended-modeset-hang-newfb-render-c:
shard-kbl: DMESG-WARN (fdo#107956) -> PASS

***@kms_busy@extended-pageflip-modeset-hang-oldfb-render-c:
shard-glk: DMESG-WARN (fdo#107956) -> PASS

***@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-mmap-gtt:
shard-apl: FAIL (fdo#103167) -> PASS +1

***@kms_frontbuffer_tracking@fbc-1p-rte:
shard-glk: FAIL (fdo#103167, fdo#105682) -> PASS

***@kms_plane@plane-position-covered-pipe-a-planes:
shard-glk: FAIL (fdo#103166) -> PASS

***@kms_plane_multiple@atomic-pipe-c-tiling-yf:
shard-apl: FAIL (fdo#103166) -> PASS +1

***@perf@polling:
shard-hsw: FAIL (fdo#102252) -> PASS


fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
fdo#102365 https://bugs.freedesktop.org/show_bug.cgi?id=102365
fdo#103158 https://bugs.freedesktop.org/show_bug.cgi?id=103158
fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166
fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
fdo#103184 https://bugs.freedesktop.org/show_bug.cgi?id=103184
fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
fdo#103232 https://bugs.freedesktop.org/show_bug.cgi?id=103232
fdo#104782 https://bugs.freedesktop.org/show_bug.cgi?id=104782
fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411
fdo#105458 https://bugs.freedesktop.org/show_bug.cgi?id=105458
fdo#105682 https://bugs.freedesktop.org/show_bug.cgi?id=105682
fdo#105683 https://bugs.freedesktop.org/show_bug.cgi?id=105683
fdo#105748 https://bugs.freedesktop.org/show_bug.cgi?id=105748
fdo#106885 https://bugs.freedesktop.org/show_bug.cgi?id=106885
fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362
fdo#107725 https://bugs.freedesktop.org/show_bug.cgi?id=107725
fdo#107847 https://bugs.freedesktop.org/show_bug.cgi?id=107847
fdo#107882 https://bugs.freedesktop.org/show_bug.cgi?id=107882
fdo#107956 https://bugs.freedesktop.org/show_bug.cgi?id=107956
fdo#108039 https://bugs.freedesktop.org/show_bug.cgi?id=108039
fdo#108074 https://bugs.freedesktop.org/show_bug.cgi?id=108074
fdo#108145 https://bugs.freedesktop.org/show_bug.cgi?id=108145
fdo#108146 https://bugs.freedesktop.org/show_bug.cgi?id=108146


== Participating hosts (6 -> 6) ==

No changes in participating hosts


== Build changes ==

* Linux: CI_DRM_4982 -> Patchwork_10461

CI_DRM_4982: 6222b112cd485ea16d06c120531becf97ee57bc7 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4678: 9310a1265ceabeec736bdf0a76e1e0357c76c0b1 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_10461: 40f273c57c71aeb1957dd683859fdee8baffd13a @ git://anongit.freedesktop.org/gfx-ci/linux
piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10461/shards.html
Tomasz Lis
2018-11-09 17:18:53 UTC
Permalink
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

The advantage of this new preemption path is that one less context switch is
needed, and returning information about preempion being complete is received
earlier. This leads to significant improvement in our IGT latency test.

Test performed: `gem_exec_latency --run-subtest render-preemption`, executed
100 times, on the same platform, same kernel, without and with this patch.
Then taken average of the execution latency times:

subcase old preempt. icl preempt.
render-render 853.2036 840.1176
render-bsd 2328.8708 2083.2576
render-blt 2080.1501 1852.0792
render-vebox 1553.5134 1428.762

Improvement observed:

subcase improvement
render-render 1.53%
render-bsd 10.55%
render-blt 10.96%
render-vebox 8.03%

v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

v5: Renamed inject_preempt_context(). (Daniele)
Removed duplicated GEM_BUG_ON() on HWACK (Daniele)

v6: Added performance test results.

Bspec: 18922
Cc: Joonas Lahtinen <***@linux.intel.com>
Cc: Chris Wilson <***@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <***@intel.com>
Cc: Michal Winiarski <***@intel.com>
Cc: Mika Kuoppala <***@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <***@intel.com>
Signed-off-by: Tomasz Lis <***@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem_context.c | 3 +-
drivers/gpu/drm/i915/i915_pci.c | 3 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 109 +++++++++++++++++++++----------
drivers/gpu/drm/i915/intel_lrc.h | 1 +
6 files changed, 84 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 08d25aa..d2cc9f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2579,6 +2579,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)

#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b97963d..10b1d61 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -529,7 +529,8 @@ static void init_contexts(struct drm_i915_private *i915)

static bool needs_preempt_context(struct drm_i915_private *i915)
{
- return HAS_LOGICAL_RING_PREEMPTION(i915);
+ return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+ !HAS_HW_PREEMPT_TO_IDLE(i915);
}

int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4ccab83..82125cf 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -600,7 +600,8 @@ static const struct intel_device_info intel_cannonlake_info = {
TRANSCODER_DSI0_OFFSET, TRANSCODER_DSI1_OFFSET}, \
GEN(11), \
.ddb_size = 2048, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1

static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 86ce1db..a2ee278 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -104,6 +104,7 @@ enum intel_ppgtt {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 08fd9b1..26b7062 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -155,6 +155,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)

#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -500,29 +501,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
port_set(port, port_pack(i915_request_get(rq), port_count(port)));
}

-static void inject_preempt_context(struct intel_engine_cs *engine)
+static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce =
- to_intel_context(engine->i915->preempt_context, engine);
- unsigned int n;
+ GEM_TRACE("%s\n", engine->name);

- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
+ if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);

- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
- GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
+ /*
+ * If we have hardware preempt-to-idle, we do not need to
+ * inject any job to the hardware. We only set a flag.
+ */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+ } else {
+ struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+ unsigned int n;

- write_desc(execlists, ce->lrc_desc, n);
+ GEM_BUG_ON(execlists->preempt_complete_status !=
+ upper_32_bits(ce->lrc_desc));
+ GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));

- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ for (n = execlists_num_ports(execlists); --n; )
+ write_desc(execlists, 0, n);
+
+ write_desc(execlists, ce->lrc_desc, n);
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ }

execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -595,7 +616,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
return;

if (need_preempt(engine, last, execlists->queue_priority)) {
- inject_preempt_context(engine);
+ execlist_send_preempt_to_idle(engine);
return;
}

@@ -922,22 +943,43 @@ static void process_csb(struct intel_engine_cs *engine)
execlists->active);

status = buf[2 * head];
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
-
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ /*
+ * Check if preempted from idle to idle directly.
+ * The STATUS_IDLE_ACTIVE flag is used to mark
+ * such transition.
+ */
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {

- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ /*
+ * We could not have COMPLETED anything
+ * if we were idle before preemption.
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+ } else {
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;

- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /* We should never get a COMPLETED | IDLE_ACTIVE! */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ }
+
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
complete_preempt_context(execlists);
continue;
@@ -2150,7 +2192,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;

engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;

engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f5a5502..871901a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -43,6 +43,7 @@
#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510)
#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550)
#define EL_CTRL_LOAD (1 << 0)
+#define EL_CTRL_PREEMPT_TO_IDLE (1 << 1)

/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
--
2.7.4
Tvrtko Ursulin
2018-12-10 15:40:34 UTC
Permalink
Post by Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.
Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.
This patch does not cover using the new preemption mechanism when GuC is
active.
The advantage of this new preemption path is that one less context switch is
needed, and returning information about preempion being complete is received
earlier. This leads to significant improvement in our IGT latency test.
Test performed: `gem_exec_latency --run-subtest render-preemption`, executed
100 times, on the same platform, same kernel, without and with this patch.
subcase old preempt. icl preempt.
render-render 853.2036 840.1176
render-bsd 2328.8708 2083.2576
render-blt 2080.1501 1852.0792
render-vebox 1553.5134 1428.762
subcase improvement
render-render 1.53%
render-bsd 10.55%
render-blt 10.96%
render-vebox 8.03%
Who can explain what do the parts other than render-render mean? At
least I can make sense of render-render - measure how long it takes for
one context to preempt another, but render-$other draws a blank for me.
How are engines pre-empting one another?

But anyway, even if only the 1.53% improvement is the real one, FWIW
that's I think good enough to justify the patch. It is sufficiently
small and contained that I don't see a problem. So:

Acked-by: Tvrtko Ursulin <***@intel.com>

Regards,

Tvrtko
Post by Tomasz Lis
v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.
v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
v5: Renamed inject_preempt_context(). (Daniele)
Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
v6: Added performance test results.
Bspec: 18922
---
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem_context.c | 3 +-
drivers/gpu/drm/i915/i915_pci.c | 3 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 109 +++++++++++++++++++++----------
drivers/gpu/drm/i915/intel_lrc.h | 1 +
6 files changed, 84 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 08d25aa..d2cc9f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2579,6 +2579,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)
#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b97963d..10b1d61 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -529,7 +529,8 @@ static void init_contexts(struct drm_i915_private *i915)
static bool needs_preempt_context(struct drm_i915_private *i915)
{
- return HAS_LOGICAL_RING_PREEMPTION(i915);
+ return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+ !HAS_HW_PREEMPT_TO_IDLE(i915);
}
int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4ccab83..82125cf 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -600,7 +600,8 @@ static const struct intel_device_info intel_cannonlake_info = {
TRANSCODER_DSI0_OFFSET, TRANSCODER_DSI1_OFFSET}, \
GEN(11), \
.ddb_size = 2048, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1
static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 86ce1db..a2ee278 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -104,6 +104,7 @@ enum intel_ppgtt {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 08fd9b1..26b7062 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -155,6 +155,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -500,29 +501,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
port_set(port, port_pack(i915_request_get(rq), port_count(port)));
}
-static void inject_preempt_context(struct intel_engine_cs *engine)
+static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce =
- to_intel_context(engine->i915->preempt_context, engine);
- unsigned int n;
+ GEM_TRACE("%s\n", engine->name);
- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
+ if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);
- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
- GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
+ /*
+ * If we have hardware preempt-to-idle, we do not need to
+ * inject any job to the hardware. We only set a flag.
+ */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+ } else {
+ struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+ unsigned int n;
- write_desc(execlists, ce->lrc_desc, n);
+ GEM_BUG_ON(execlists->preempt_complete_status !=
+ upper_32_bits(ce->lrc_desc));
+ GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ for (n = execlists_num_ports(execlists); --n; )
+ write_desc(execlists, 0, n);
+
+ write_desc(execlists, ce->lrc_desc, n);
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ }
execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -595,7 +616,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
return;
if (need_preempt(engine, last, execlists->queue_priority)) {
- inject_preempt_context(engine);
+ execlist_send_preempt_to_idle(engine);
return;
}
@@ -922,22 +943,43 @@ static void process_csb(struct intel_engine_cs *engine)
execlists->active);
status = buf[2 * head];
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
-
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ /*
+ * Check if preempted from idle to idle directly.
+ * The STATUS_IDLE_ACTIVE flag is used to mark
+ * such transition.
+ */
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ /*
+ * We could not have COMPLETED anything
+ * if we were idle before preemption.
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+ } else {
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /* We should never get a COMPLETED | IDLE_ACTIVE! */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ }
+
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
complete_preempt_context(execlists);
continue;
@@ -2150,7 +2192,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f5a5502..871901a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -43,6 +43,7 @@
#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510)
#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550)
#define EL_CTRL_LOAD (1 << 0)
+#define EL_CTRL_PREEMPT_TO_IDLE (1 << 1)
/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
Patchwork
2018-11-09 18:17:44 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev8)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
565cd7090a44 drm/i915/icl: Preempt-to-idle support in execlists.
-:18: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#18:
The advantage of this new preemption path is that one less context switch is

-:153: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "!execlists->ctrl_reg"
#153: FILE: drivers/gpu/drm/i915/intel_lrc.c:514:
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);

-:229: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#229: FILE: drivers/gpu/drm/i915/intel_lrc.c:952:
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {

-:263: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#263: FILE: drivers/gpu/drm/i915/intel_lrc.c:982:
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
^

total: 0 errors, 1 warnings, 3 checks, 187 lines checked
Patchwork
2018-11-09 18:18:39 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev8)
URL : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915/icl: Preempt-to-idle support in execlists.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3714:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3716:16: warning: expression using sizeof(void)
Patchwork
2018-11-09 18:33:49 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev8)
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_5116 -> Patchwork_10797 =

== Summary - SUCCESS ==

No regressions found.

External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/8/mbox/

== Known issues ==

Here are the changes found in Patchwork_10797 that come from known issues:

=== IGT changes ===

==== Issues hit ====

***@gem_ctx_create@basic-files:
fi-icl-u2: PASS -> DMESG-WARN (fdo#107724)

***@kms_pipe_crc_basic@nonblocking-crc-pipe-a-frame-sequence:
fi-byt-clapper: PASS -> FAIL (fdo#107362, fdo#103191)

***@kms_pipe_crc_basic@read-crc-pipe-b:
fi-byt-clapper: PASS -> FAIL (fdo#107362)


==== Possible fixes ====

***@drv_selftest@live_hugepages:
fi-skl-6700k2: INCOMPLETE -> PASS

***@kms_chamelium@common-hpd-after-suspend:
fi-skl-6700k2: TIMEOUT -> PASS

***@kms_flip@basic-plain-flip:
fi-ilk-650: DMESG-WARN (fdo#106387) -> PASS

***@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
fi-byt-clapper: FAIL (fdo#107362, fdo#103191) -> PASS


fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
fdo#106387 https://bugs.freedesktop.org/show_bug.cgi?id=106387
fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362
fdo#107724 https://bugs.freedesktop.org/show_bug.cgi?id=107724


== Participating hosts (51 -> 46) ==

Additional (1): fi-glk-j4005
Missing (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-icl-u


== Build changes ==

* Linux: CI_DRM_5116 -> Patchwork_10797

CI_DRM_5116: ade66f7f60026c1c7e68a12ce07d5d4000afce13 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4714: cab148ca3ec904a94d0cd43476cf7e1f8663f906 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_10797: 565cd7090a442f92340477f4bfe8c55f7590344f @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

565cd7090a44 drm/i915/icl: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10797/issues.html
Patchwork
2018-11-10 03:29:47 UTC
Permalink
== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev8)
URL : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_5116_full -> Patchwork_10797_full =

== Summary - WARNING ==

Minor unknown changes coming with Patchwork_10797_full need to be verified
manually.

If you think the reported changes have nothing to do with the changes
introduced in Patchwork_10797_full, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.



== Possible new issues ==

Here are the unknown changes that may have been introduced in Patchwork_10797_full:

=== IGT changes ===

==== Warnings ====

***@kms_atomic_interruptible@universal-setplane-cursor:
shard-snb: SKIP -> PASS +2

***@perf_pmu@rc6:
shard-kbl: SKIP -> PASS


== Known issues ==

Here are the changes found in Patchwork_10797_full that come from known issues:

=== IGT changes ===

==== Issues hit ====

***@drv_suspend@shrink:
shard-skl: PASS -> INCOMPLETE (fdo#106886)

***@gem_exec_schedule@pi-ringfull-blt:
shard-skl: NOTRUN -> FAIL (fdo#103158)

***@kms_available_modes_crc@available_mode_test_crc:
shard-snb: NOTRUN -> FAIL (fdo#106641)

***@kms_busy@extended-modeset-hang-newfb-render-a:
shard-snb: NOTRUN -> DMESG-WARN (fdo#107956)

***@kms_busy@extended-modeset-hang-newfb-render-c:
shard-skl: NOTRUN -> DMESG-WARN (fdo#107956) +2

***@kms_busy@extended-modeset-hang-newfb-with-reset-render-c:
shard-kbl: NOTRUN -> DMESG-WARN (fdo#107956) +1

***@kms_busy@extended-pageflip-modeset-hang-oldfb-render-c:
shard-glk: NOTRUN -> DMESG-WARN (fdo#107956)

***@kms_color@pipe-c-degamma:
shard-apl: PASS -> FAIL (fdo#104782)

***@kms_cursor_crc@cursor-128x128-sliding:
shard-apl: PASS -> FAIL (fdo#103232)

***@kms_cursor_crc@cursor-256x256-onscreen:
shard-skl: PASS -> FAIL (fdo#103232)

***@kms_cursor_crc@cursor-256x256-suspend:
shard-skl: PASS -> INCOMPLETE (fdo#104108)

***@kms_cursor_crc@cursor-64x21-sliding:
shard-skl: NOTRUN -> FAIL (fdo#103232) +2

***@kms_draw_crc@draw-method-xrgb2101010-mmap-gtt-xtiled:
shard-skl: PASS -> FAIL (fdo#103184)

***@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-mmap-wc:
shard-glk: PASS -> FAIL (fdo#103167) +3

***@kms_frontbuffer_tracking@fbcpsr-1p-rte:
shard-skl: PASS -> FAIL (fdo#105682)

***@kms_frontbuffer_tracking@psr-1p-primscrn-indfb-msflip-blt:
shard-skl: PASS -> FAIL (fdo#103167) +1

***@kms_plane@pixel-format-pipe-b-planes:
shard-skl: NOTRUN -> DMESG-WARN (fdo#106885) +1

***@kms_plane_alpha_blend@pipe-a-coverage-7efc:
shard-skl: PASS -> FAIL (fdo#108145, fdo#107815)

***@kms_plane_alpha_blend@pipe-b-alpha-basic:
shard-skl: NOTRUN -> FAIL (fdo#108145, fdo#107815) +1

***@kms_plane_alpha_blend@pipe-b-alpha-opaque-fb:
shard-glk: NOTRUN -> FAIL (fdo#108145)

***@kms_plane_alpha_blend@pipe-c-coverage-7efc:
shard-skl: PASS -> FAIL (fdo#107815) +1

***@kms_plane_multiple@atomic-pipe-b-tiling-y:
shard-glk: PASS -> FAIL (fdo#103166) +1
shard-apl: PASS -> FAIL (fdo#103166)

***@kms_setmode@basic:
shard-snb: NOTRUN -> FAIL (fdo#99912)


==== Possible fixes ====

***@drv_suspend@forcewake:
shard-kbl: INCOMPLETE (fdo#103665) -> PASS

***@gem_cpu_reloc@full:
shard-skl: INCOMPLETE (fdo#108073) -> PASS

***@gem_ppgtt@blt-vs-render-ctx0:
shard-skl: TIMEOUT (fdo#108039) -> PASS
shard-kbl: INCOMPLETE (fdo#106023, fdo#103665, fdo#106887) -> PASS

***@kms_busy@extended-modeset-hang-newfb-render-b:
shard-snb: DMESG-WARN (fdo#107956) -> PASS

***@kms_cursor_crc@cursor-256x256-sliding:
shard-apl: FAIL (fdo#103232) -> PASS

***@kms_cursor_crc@cursor-256x256-suspend:
shard-apl: FAIL (fdo#103232, fdo#103191) -> PASS

***@kms_cursor_crc@cursor-64x64-suspend:
shard-skl: INCOMPLETE (fdo#104108) -> PASS +1

***@kms_flip@flip-vs-expired-vblank-interruptible:
shard-glk: FAIL (fdo#105363) -> PASS

***@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-mmap-gtt:
shard-apl: FAIL (fdo#103167) -> PASS +1

***@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-move:
shard-glk: FAIL (fdo#103167) -> PASS +2

***@kms_plane_alpha_blend@pipe-a-constant-alpha-max:
shard-glk: FAIL (fdo#108145) -> PASS

***@perf@blocking:
shard-hsw: FAIL (fdo#102252) -> PASS

***@pm_rpm@gem-execbuf:
shard-skl: INCOMPLETE (fdo#107807, fdo#107803) -> PASS

***@pm_rpm@modeset-lpsp:
shard-skl: INCOMPLETE (fdo#107807) -> PASS +1


fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
fdo#103158 https://bugs.freedesktop.org/show_bug.cgi?id=103158
fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166
fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
fdo#103184 https://bugs.freedesktop.org/show_bug.cgi?id=103184
fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
fdo#103232 https://bugs.freedesktop.org/show_bug.cgi?id=103232
fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
fdo#104108 https://bugs.freedesktop.org/show_bug.cgi?id=104108
fdo#104782 https://bugs.freedesktop.org/show_bug.cgi?id=104782
fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363
fdo#105682 https://bugs.freedesktop.org/show_bug.cgi?id=105682
fdo#106023 https://bugs.freedesktop.org/show_bug.cgi?id=106023
fdo#106641 https://bugs.freedesktop.org/show_bug.cgi?id=106641
fdo#106885 https://bugs.freedesktop.org/show_bug.cgi?id=106885
fdo#106886 https://bugs.freedesktop.org/show_bug.cgi?id=106886
fdo#106887 https://bugs.freedesktop.org/show_bug.cgi?id=106887
fdo#107803 https://bugs.freedesktop.org/show_bug.cgi?id=107803
fdo#107807 https://bugs.freedesktop.org/show_bug.cgi?id=107807
fdo#107815 https://bugs.freedesktop.org/show_bug.cgi?id=107815
fdo#107956 https://bugs.freedesktop.org/show_bug.cgi?id=107956
fdo#108039 https://bugs.freedesktop.org/show_bug.cgi?id=108039
fdo#108073 https://bugs.freedesktop.org/show_bug.cgi?id=108073
fdo#108145 https://bugs.freedesktop.org/show_bug.cgi?id=108145
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (6 -> 6) ==

No changes in participating hosts


== Build changes ==

* Linux: CI_DRM_5116 -> Patchwork_10797

CI_DRM_5116: ade66f7f60026c1c7e68a12ce07d5d4000afce13 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4714: cab148ca3ec904a94d0cd43476cf7e1f8663f906 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_10797: 565cd7090a442f92340477f4bfe8c55f7590344f @ git://anongit.freedesktop.org/gfx-ci/linux
piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10797/shards.html
Loading...