[Intel-gfx] [PATCH v3 3/4] drm/i915: set optimum eu/slice/sub-slice configuration based on load type

Discussion:

Ankit Navik

2018-12-11 09:40:32 UTC

From: Praveen Diwakar <***@intel.com>

This patch will select optimum eu/slice/sub-slice configuration based on
type of load (low, medium, high) as input.
Based on our readings and experiments we have predefined set of optimum
configuration for each platform(CHT, KBL).
i915_gem_context_set_load_type will select optimum configuration from
pre-defined optimum configuration table(opt_config).

It also introduce flag update_render_config which can set by any governor.

v2:
* Move static optimum_config to device init time.
* Rename function to appropriate name, fix data types and patch ordering.
* Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)

v3:
* Add safe guard check in i915_gem_context_set_load_type.
* Rename struct from optimum_config to i915_sseu_optimum_config to
avoid namespace clashes.
* Reduces memcpy for space efficient.
* Rebase.
* Improved commit message. (Tvrtko Ursulin)

Cc: Kedar J Karanje <***@intel.com>
Cc: Yogesh Marathe <***@intel.com>
Reviewed-by: Tvrtko Ursulin <***@linux.intel.com>
Signed-off-by: Praveen Diwakar <***@intel.com>
Signed-off-by: Aravindan Muthukumar <***@intel.com>
Signed-off-by: Ankit Navik <***@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 3 ++
drivers/gpu/drm/i915/i915_gem_context.c | 18 ++++++++++++
drivers/gpu/drm/i915/i915_gem_context.h | 25 +++++++++++++++++
drivers/gpu/drm/i915/intel_device_info.c | 47 ++++++++++++++++++++++++++++++--
drivers/gpu/drm/i915/intel_lrc.c | 4 ++-
5 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4aca534..4b9a8c5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1681,6 +1681,9 @@ struct drm_i915_private {
struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
int num_fence_regs; /* 8 on pre-965, 16 otherwise */

+ /* optimal slice/subslice/EU configration state */
+ struct i915_sseu_optimum_config opt_config[LOAD_TYPE_LAST];
+
unsigned int fsb_freq, mem_freq, is_ddr3;
unsigned int skl_preferred_vco_freq;
unsigned int max_cdclk_freq;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d040858..c0ced72 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -392,10 +392,28 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
ctx->subslice_cnt = hweight8(
INTEL_INFO(dev_priv)->sseu.subslice_mask[0]);
ctx->eu_cnt = INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
+ ctx->load_type = 0;
+ ctx->pending_load_type = 0;

return ctx;
}

+
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+ enum gem_load_type type)
+{
+ struct drm_i915_private *dev_priv = ctx->i915;
+
+ if (GEM_WARN_ON(type > LOAD_TYPE_LAST))
+ return;
+
+ /* Call opt_config to get correct configuration for eu,slice,subslice */
+ ctx->slice_cnt = dev_priv->opt_config[type].slice;
+ ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
+ ctx->eu_cnt = dev_priv->opt_config[type].eu;
+ ctx->pending_load_type = type;
+}
+
/**
* i915_gem_context_create_gvt - create a GVT GEM context
* @dev: drm device *
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index e000530..a0db13c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -53,6 +53,19 @@ struct intel_context_ops {
void (*destroy)(struct intel_context *ce);
};

+enum gem_load_type {
+ LOAD_TYPE_LOW,
+ LOAD_TYPE_MEDIUM,
+ LOAD_TYPE_HIGH,
+ LOAD_TYPE_LAST
+};
+
+struct i915_sseu_optimum_config {
+ u8 slice;
+ u8 subslice;
+ u8 eu;
+};
+
/**
* struct i915_gem_context - client state
*
@@ -208,6 +221,16 @@ struct i915_gem_context {

/** eu_cnt: used to set the # of eu to be enabled. */
u8 eu_cnt;
+
+ /** load_type: The designated load_type (high/medium/low) for a given
+ * number of pending commands in the command queue.
+ */
+ enum gem_load_type load_type;
+
+ /** pending_load_type: The earlier load type that the GPU was configured
+ * for (high/medium/low).
+ */
+ enum gem_load_type pending_load_type;
};

static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
@@ -336,6 +359,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+ enum gem_load_type type);

struct i915_gem_context *
i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 0ef0c64..33c6310 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -741,6 +741,28 @@ void intel_device_info_runtime_init(struct intel_device_info *info)
container_of(info, struct drm_i915_private, info);
enum pipe pipe;

+ struct i915_sseu_optimum_config *opt_config = NULL;
+ /* static table of slice/subslice/EU for Cherryview */
+ struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
+ {1, 1, 4}, /* Low */
+ {1, 1, 6}, /* Medium */
+ {1, 2, 6} /* High */
+ };
+
+ /* static table of slice/subslice/EU for KBL GT2 */
+ struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
+ {1, 3, 2}, /* Low */
+ {1, 3, 4}, /* Medium */
+ {1, 3, 8} /* High */
+ };
+
+ /* static table of slice/subslice/EU for KBL GT3 */
+ struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
+ {2, 3, 4}, /* Low */
+ {2, 3, 6}, /* Medium */
+ {2, 3, 8} /* High */
+ };
+
if (INTEL_GEN(dev_priv) >= 10) {
for_each_pipe(dev_priv, pipe)
info->num_scalers[pipe] = 2;
@@ -840,17 +862,38 @@ void intel_device_info_runtime_init(struct intel_device_info *info)
/* Initialize slice/subslice/EU info */
if (IS_HASWELL(dev_priv))
haswell_sseu_info_init(dev_priv);
- else if (IS_CHERRYVIEW(dev_priv))
+ else if (IS_CHERRYVIEW(dev_priv)) {
cherryview_sseu_info_init(dev_priv);
+ opt_config = chv_config;
+ }
else if (IS_BROADWELL(dev_priv))
broadwell_sseu_info_init(dev_priv);
- else if (INTEL_GEN(dev_priv) == 9)
+ else if (INTEL_GEN(dev_priv) == 9) {
gen9_sseu_info_init(dev_priv);
+
+ if (IS_KABYLAKE(dev_priv)) {
+ switch (info->gt) {
+ default:
+ MISSING_CASE(info->gt);
+ /* fall through */
+ case 2:
+ opt_config = kbl_gt2_config;
+ break;
+ case 3:
+ opt_config = kbl_gt3_config;
+ break;
+ }
+ }
+ }
else if (INTEL_GEN(dev_priv) == 10)
gen10_sseu_info_init(dev_priv);
else if (INTEL_GEN(dev_priv) >= 11)
gen11_sseu_info_init(dev_priv);

+ if (opt_config)
+ memcpy(dev_priv->opt_config, opt_config, LOAD_TYPE_LAST *
+ sizeof(struct i915_sseu_optimum_config));
+
/* Initialize command stream timestamp frequency */
info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a17f676..7fb9cd2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -427,11 +427,13 @@ static u64 execlists_update_context(struct i915_request *rq)

reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
/* FIXME: To avoid stale rpcs config, move it to context_pin */
- if (ctx->pid && ctx->name && (rq->engine->id == RCS)) {
+ if (ctx->pid && ctx->name && (rq->engine->id == RCS) &&
+ (ctx->load_type != ctx->pending_load_type)) {
rpcs_config = make_rpcs(ctx->i915);
reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
rpcs_config);
+ ctx->load_type = ctx->pending_load_type;
}

/* True 32b PPGTT with dynamic page allocation: update PDP

--
2.7.4

Ankit Navik

2018-12-11 09:40:33 UTC

Permalink

From: Praveen Diwakar <***@intel.com>

High resolution timer is used for predictive governor to control
eu/slice/subslice based on workloads.

Debugfs is provided to enable/disable/update timer configuration

V2:
* Fix code style.
* Move predictive_load_timer into a drm_i915_private
structure.
* Make generic function to set optimum config. (Tvrtko Ursulin)

V3:
* Rebase.
* Fix race condition for predictive load set.
* Add slack to start hrtimer for more power efficient. (Tvrtko Ursulin)

Cc: Aravindan Muthukumar <***@intel.com>
Cc: Yogesh Marathe <***@intel.com>
Reviewed-by: Tvrtko Ursulin <***@linux.intel.com>
Signed-off-by: Praveen Diwakar <***@intel.com>
Signed-off-by: Kedar J Karanje <***@intel.com>
Signed-off-by: Ankit Navik <***@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 90 ++++++++++++++++++++++++++++++++++++-
drivers/gpu/drm/i915/i915_drv.c | 4 ++
drivers/gpu/drm/i915/i915_drv.h | 6 +++
3 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f9ce35d..861f3c1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4740,6 +4740,92 @@ static const struct drm_info_list i915_debugfs_list[] = {
{"i915_drrs_status", i915_drrs_status, 0},
{"i915_rps_boost_info", i915_rps_boost_info, 0},
};
+
+#define PENDING_REQ_0 0 /* No active request pending */
+
+/*
+ * Anything above threshold is considered as HIGH load, less is considered
+ * as LOW load and equal is considered as MEDIUM load.
+ *
+ * The threshold value of three active requests pending.
+ */
+#define PENDING_THRESHOLD_MEDIUM 3
+
+#define SLACK_TIMER_NSEC 1000000 /* Timer range in nano second */
+
+enum hrtimer_restart predictive_load_cb(struct hrtimer *hrtimer)
+{
+ struct drm_i915_private *dev_priv =
+ container_of(hrtimer, typeof(*dev_priv), pred_timer);
+ struct i915_gem_context *ctx;
+ enum gem_load_type load_type;
+ unsigned long req_pending;
+
+ list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
+
+ /* Check for in-valid context */
+ if (!ctx->name)
+ continue;
+
+ req_pending = atomic_read(&ctx->req_cnt);
+
+ /*
+ * Transitioning to low state whenever pending request is zero
+ * would cause vacillation between low and high state.
+ */
+ if (req_pending == PENDING_REQ_0)
+ continue;
+
+ if (req_pending > PENDING_THRESHOLD_MEDIUM)
+ load_type = LOAD_TYPE_HIGH;
+ else if (req_pending == PENDING_THRESHOLD_MEDIUM)
+ load_type = LOAD_TYPE_MEDIUM;
+ else
+ load_type = LOAD_TYPE_LOW;
+
+ i915_gem_context_set_load_type(ctx, load_type);
+ }
+
+ hrtimer_forward_now(hrtimer,
+ ms_to_ktime(dev_priv->predictive_load_enable));
+
+ return HRTIMER_RESTART;
+}
+
+static int i915_predictive_load_get(void *data, u64 *val)
+{
+ struct drm_i915_private *dev_priv = data;
+
+ *val = dev_priv->predictive_load_enable;
+ return 0;
+}
+
+static int i915_predictive_load_set(void *data, u64 val)
+{
+ struct drm_i915_private *dev_priv = data;
+
+ mutex_lock(&dev_priv->pred_mutex);
+
+ dev_priv->predictive_load_enable = val;
+
+ if (dev_priv->predictive_load_enable) {
+ if (!hrtimer_active(&dev_priv->pred_timer))
+ hrtimer_start_range_ns(&dev_priv->pred_timer,
+ ms_to_ktime(dev_priv->predictive_load_enable),
+ SLACK_TIMER_NSEC,
+ HRTIMER_MODE_REL_PINNED);
+ } else {
+ hrtimer_cancel(&dev_priv->pred_timer);
+ }
+
+ mutex_unlock(&dev_priv->pred_mutex);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_predictive_load_ctl,
+ i915_predictive_load_get, i915_predictive_load_set,
+ "%llu\n");
+
#define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)

static const struct i915_debugfs_files {
@@ -4769,7 +4855,9 @@ static const struct i915_debugfs_files {
{"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops},
{"i915_ipc_status", &i915_ipc_status_fops},
{"i915_drrs_ctl", &i915_drrs_ctl_fops},
- {"i915_edp_psr_debug", &i915_edp_psr_debug_fops}
+ {"i915_edp_psr_debug", &i915_edp_psr_debug_fops},
+ /* FIXME: When feature will become real, move to sysfs */
+ {"i915_predictive_load_ctl", &i915_predictive_load_ctl}
};

int i915_debugfs_register(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f8cfd16..79f4df5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1397,6 +1397,10 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ret < 0)
goto out_cleanup_hw;

+ /* Timer initialization for predictive load */
+ hrtimer_init(&dev_priv->pred_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ dev_priv->pred_timer.function = predictive_load_cb;
+
i915_driver_register(dev_priv);

intel_runtime_pm_enable(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4b9a8c5..a78fdbc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1684,6 +1684,11 @@ struct drm_i915_private {
/* optimal slice/subslice/EU configration state */
struct i915_sseu_optimum_config opt_config[LOAD_TYPE_LAST];

+ /* protects predictive load state */
+ struct mutex pred_mutex;
+ struct hrtimer pred_timer;
+ int predictive_load_enable;
+
unsigned int fsb_freq, mem_freq, is_ddr3;
unsigned int skl_preferred_vco_freq;
unsigned int max_cdclk_freq;
@@ -2730,6 +2735,7 @@ extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
+extern enum hrtimer_restart predictive_load_cb(struct hrtimer *hrtimer);
int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);

int intel_engines_init_mmio(struct drm_i915_private *dev_priv);

--
2.7.4

Patchwork

2018-12-11 09:49:47 UTC

Permalink

== Series Details ==

Series: series starting with [v3,1/4] drm/i915: Get active pending request for given context
URL : https://patchwork.freedesktop.org/series/53872/
State : failure

== Summary ==

Applying: drm/i915: Get active pending request for given context
Using index info to reconstruct a base tree...
M drivers/gpu/drm/i915/i915_gem_context.c
M drivers/gpu/drm/i915/i915_gem_context.h
M drivers/gpu/drm/i915/i915_request.c
M drivers/gpu/drm/i915/intel_lrc.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/intel_lrc.c
Auto-merging drivers/gpu/drm/i915/i915_request.c
Auto-merging drivers/gpu/drm/i915/i915_gem_context.h
Auto-merging drivers/gpu/drm/i915/i915_gem_context.c
Applying: drm/i915: Update render power clock state configuration for given context
Using index info to reconstruct a base tree...
M drivers/gpu/drm/i915/i915_gem_context.c
M drivers/gpu/drm/i915/i915_gem_context.h
M drivers/gpu/drm/i915/intel_lrc.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/intel_lrc.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/intel_lrc.c
Auto-merging drivers/gpu/drm/i915/i915_gem_context.h
Auto-merging drivers/gpu/drm/i915/i915_gem_context.c
error: Failed to merge in the changes.
Patch failed at 0002 drm/i915: Update render power clock state configuration for given context
Use 'git am --show-current-patch' to see the failed patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".