From 7f92e6c2aecf66f1ccf5d3137aaf1aa9905940d4 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 23 Jan 2019 16:05:58 -0800
Subject: [PATCH 01/89] drm/i915: initialize unused MOCS entries to PTE

Instead of initializing them to uncached, let's set them to PTE for
kernel tracking. While at it do some minor adjustments to comments and
coding style.

From Chris: "What it does mean is that the buffer contents are consistent
with our cache tracking; and for userspace the results were always
undefined. So we should at least be able to guarantee that the data
written by userspace from the CPU is visible. After that, your caches
are on your own".

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tomasz Lis <tomasz.lis@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-2-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/intel_mocs.c | 56 +++++++++++++------------------
 1 file changed, 23 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index e976c5ce5479..0d6b94a239d6 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -85,10 +85,7 @@ struct drm_i915_mocs_table {
  *
  * Entries not part of the following tables are undefined as far as
  * userspace is concerned and shouldn't be relied upon.  For the time
- * being they will be implicitly initialized to the strictest caching
- * configuration (uncached) to guarantee forwards compatibility with
- * userspace programs written against more recent kernels providing
- * additional MOCS entries.
+ * being they will be initialized to PTE.
  *
  * NOTE: These tables MUST start with being uncached and the length
  *       MUST be less than 63 as the last two registers are reserved
@@ -249,16 +246,13 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 			   table.table[index].control_value);
 
 	/*
-	 * Ok, now set the unused entries to uncached. These entries
-	 * are officially undefined and no contract for the contents
-	 * and settings is given for these entries.
-	 *
-	 * Entry 0 in the table is uncached - so we are just writing
-	 * that value to all the used entries.
+	 * Now set the unused entries to PTE. These entries are officially
+	 * undefined and no contract for the contents and settings is given
+	 * for these entries.
 	 */
 	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
 		I915_WRITE(mocs_register(engine->id, index),
-			   table.table[0].control_value);
+			   table.table[I915_MOCS_PTE].control_value);
 }
 
 /**
@@ -293,16 +287,13 @@ static int emit_mocs_control_table(struct i915_request *rq,
 	}
 
 	/*
-	 * Ok, now set the unused entries to uncached. These entries
-	 * are officially undefined and no contract for the contents
-	 * and settings is given for these entries.
-	 *
-	 * Entry 0 in the table is uncached - so we are just writing
-	 * that value to all the used entries.
+	 * Now set the unused entries to PTE. These entries are officially
+	 * undefined and no contract for the contents and settings is given
+	 * for these entries.
 	 */
 	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
 		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
-		*cs++ = table->table[0].control_value;
+		*cs++ = table->table[I915_MOCS_PTE].control_value;
 	}
 
 	*cs++ = MI_NOOP;
@@ -345,7 +336,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
 
 	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
 
-	for (i = 0; i < table->size/2; i++) {
+	for (i = 0; i < table->size / 2; i++) {
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
 		*cs++ = l3cc_combine(table, 2 * i, 2 * i + 1);
 	}
@@ -353,18 +344,18 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
 	if (table->size & 0x01) {
 		/* Odd table size - 1 left over */
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
-		*cs++ = l3cc_combine(table, 2 * i, 0);
+		*cs++ = l3cc_combine(table, 2 * i, I915_MOCS_PTE);
 		i++;
 	}
 
 	/*
-	 * Now set the rest of the table to uncached - use entry 0 as
-	 * this will be uncached. Leave the last pair uninitialised as
-	 * they are reserved by the hardware.
+	 * Now set the unused entries to PTE. These entries are officially
+	 * undefined and no contract for the contents and settings is given
+	 * for these entries.
 	 */
 	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
-		*cs++ = l3cc_combine(table, 0, 0);
+		*cs++ = l3cc_combine(table, I915_MOCS_PTE, I915_MOCS_PTE);
 	}
 
 	*cs++ = MI_NOOP;
@@ -395,22 +386,21 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
 	if (!get_mocs_settings(dev_priv, &table))
 		return;
 
-	for (i = 0; i < table.size/2; i++)
-		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 2*i+1));
+	for (i = 0; i < table.size / 2; i++)
+		I915_WRITE(GEN9_LNCFCMOCS(i),
+			   l3cc_combine(&table, 2 * i, 2 * i + 1));
 
 	/* Odd table size - 1 left over */
 	if (table.size & 0x01) {
-		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 0));
+		I915_WRITE(GEN9_LNCFCMOCS(i),
+			   l3cc_combine(&table, 2 * i, I915_MOCS_PTE));
 		i++;
 	}
 
-	/*
-	 * Now set the rest of the table to uncached - use entry 0 as
-	 * this will be uncached. Leave the last pair as initialised as
-	 * they are reserved by the hardware.
-	 */
+	/* Now set the rest of the table to PTE */
 	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
-		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
+		I915_WRITE(GEN9_LNCFCMOCS(i),
+			   l3cc_combine(&table, I915_MOCS_PTE, I915_MOCS_PTE));
 }
 
 /**

From d7a43c3ba607207bc51d859e3bd450047519a06f Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 23 Jan 2019 16:05:59 -0800
Subject: [PATCH 02/89] drm/i915: Simplify MOCS table definition

Make the defines for LE and L3 caching options to contain the shifts and
remove the zeros from the tables as shifting zeros always result in
zero.

Starting from Ice Lake the MOCS table is defined in the spec and
contains all entries. So to simplify checking the table with the values
set in code, the value is now part of the macro name. This allows to
still give the most used option and sensible name, but also to easily
cross check the table from the spec for gen >= 11.

By removing the zeros we avoid maintaining a huge table since the one
from spec contains many more entries. The new table for Ice Lake will
be added by other patches, this only reformats the table.

While at it also fix the indentation.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Tomasz Lis <tomasz.lis@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-3-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/intel_mocs.c | 80 +++++++++++--------------------
 1 file changed, 29 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 0d6b94a239d6..4ea80bb7dcc8 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -36,8 +36,8 @@ struct drm_i915_mocs_table {
 };
 
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
-#define LE_CACHEABILITY(value)	((value) << 0)
-#define LE_TGT_CACHE(value)	((value) << 2)
+#define _LE_CACHEABILITY(value)	((value) << 0)
+#define _LE_TGT_CACHE(value)	((value) << 2)
 #define LE_LRUM(value)		((value) << 4)
 #define LE_AOM(value)		((value) << 6)
 #define LE_RSC(value)		((value) << 7)
@@ -48,28 +48,28 @@ struct drm_i915_mocs_table {
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)		((value) << 0)
 #define L3_SCC(value)		((value) << 1)
-#define L3_CACHEABILITY(value)	((value) << 4)
+#define _L3_CACHEABILITY(value)	((value) << 4)
 
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
 
 /* (e)LLC caching options */
-#define LE_PAGETABLE		0
-#define LE_UC			1
-#define LE_WT			2
-#define LE_WB			3
-
-/* L3 caching options */
-#define L3_DIRECT		0
-#define L3_UC			1
-#define L3_RESERVED		2
-#define L3_WB			3
+#define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
+#define LE_1_UC			_LE_CACHEABILITY(1)
+#define LE_2_WT			_LE_CACHEABILITY(2)
+#define LE_3_WB			_LE_CACHEABILITY(3)
 
 /* Target cache */
-#define LE_TC_PAGETABLE		0
-#define LE_TC_LLC		1
-#define LE_TC_LLC_ELLC		2
-#define LE_TC_LLC_ELLC_ALT	3
+#define LE_TC_0_PAGETABLE	_LE_TGT_CACHE(0)
+#define LE_TC_1_LLC		_LE_TGT_CACHE(1)
+#define LE_TC_2_LLC_ELLC	_LE_TGT_CACHE(2)
+#define LE_TC_3_LLC_ELLC_ALT	_LE_TGT_CACHE(3)
+
+/* L3 caching options */
+#define L3_0_DIRECT		_L3_CACHEABILITY(0)
+#define L3_1_UC			_L3_CACHEABILITY(1)
+#define L3_2_RESERVED		_L3_CACHEABILITY(2)
+#define L3_3_WB			_L3_CACHEABILITY(3)
 
 /*
  * MOCS tables
@@ -96,31 +96,21 @@ struct drm_i915_mocs_table {
 static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
 	[I915_MOCS_UNCACHED] = {
 	  /* 0x00000009 */
-	  .control_value = LE_CACHEABILITY(LE_UC) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-
+	  .control_value = LE_1_UC | LE_TC_2_LLC_ELLC,
 	  /* 0x0010 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+	  .l3cc_value =    L3_1_UC,
 	},
 	[I915_MOCS_PTE] = {
 	  /* 0x00000038 */
-	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
+	  .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3),
 	  /* 0x0030 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+	  .l3cc_value =    L3_3_WB,
 	},
 	[I915_MOCS_CACHED] = {
 	  /* 0x0000003b */
-	  .control_value = LE_CACHEABILITY(LE_WB) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
+	  .control_value = LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
 	  /* 0x0030 */
-	  .l3cc_value =   L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+	  .l3cc_value =   L3_3_WB,
 	},
 };
 
@@ -128,33 +118,21 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
 static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 	[I915_MOCS_UNCACHED] = {
 	  /* 0x00000009 */
-	  .control_value = LE_CACHEABILITY(LE_UC) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-
+	  .control_value = LE_1_UC | LE_TC_2_LLC_ELLC,
 	  /* 0x0010 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+	  .l3cc_value = L3_1_UC,
 	},
 	[I915_MOCS_PTE] = {
 	  /* 0x00000038 */
-	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-
+	  .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3),
 	  /* 0x0030 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+	  .l3cc_value = L3_3_WB,
 	},
 	[I915_MOCS_CACHED] = {
 	  /* 0x00000039 */
-	  .control_value = LE_CACHEABILITY(LE_UC) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-
+	  .control_value = LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
 	  /* 0x0030 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+	  .l3cc_value = L3_3_WB,
 	},
 };
 

From 66f996052f955fe338c7cd6ca2099cfdf396b2bf Mon Sep 17 00:00:00 2001
From: Tomasz Lis <tomasz.lis@intel.com>
Date: Wed, 23 Jan 2019 16:06:00 -0800
Subject: [PATCH 03/89] drm/i915/skl: Rework MOCS tables to keep common part in
 a define

The MOCS tables are going to be very similar across platforms.

To reduce the amount of copied code, this patch rips the common part and
puts it into a definition valid for all gen9 platforms.

v2: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
    to MOCS_ENTRIES. (Joonas)
v3 (Lucas):
  - Fix indentation
  - Rebase on rework done by additional patch
  - Remove define for or-ing flags as it made the table more complex by
    requiring zeroed values to be passed
  - Do not embed comma in the macro, so to treat that just as another
    item and please source code formatting tools

Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
Suggested-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-4-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/intel_mocs.c | 57 ++++++++++++++-----------------
 1 file changed, 25 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 4ea80bb7dcc8..c7a2a8d81d90 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -93,46 +93,39 @@ struct drm_i915_mocs_table {
  *       may only be updated incrementally by adding entries at the
  *       end.
  */
+
+#define GEN9_MOCS_ENTRIES \
+	[I915_MOCS_UNCACHED] = { \
+		/* 0x00000009 */ \
+		.control_value = LE_1_UC | LE_TC_2_LLC_ELLC, \
+		/* 0x0010 */ \
+		.l3cc_value = L3_1_UC, \
+	}, \
+	[I915_MOCS_PTE] = { \
+		/* 0x00000038 */ \
+		.control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
+		/* 0x0030 */ \
+		.l3cc_value = L3_3_WB, \
+	}
+
 static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-	[I915_MOCS_UNCACHED] = {
-	  /* 0x00000009 */
-	  .control_value = LE_1_UC | LE_TC_2_LLC_ELLC,
-	  /* 0x0010 */
-	  .l3cc_value =    L3_1_UC,
-	},
-	[I915_MOCS_PTE] = {
-	  /* 0x00000038 */
-	  .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3),
-	  /* 0x0030 */
-	  .l3cc_value =    L3_3_WB,
-	},
+	GEN9_MOCS_ENTRIES,
 	[I915_MOCS_CACHED] = {
-	  /* 0x0000003b */
-	  .control_value = LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
-	  /* 0x0030 */
-	  .l3cc_value =   L3_3_WB,
+		/* 0x0000003b */
+		.control_value = LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
+		/* 0x0030 */
+		.l3cc_value =   L3_3_WB,
 	},
 };
 
 /* NOTE: the LE_TGT_CACHE is not used on Broxton */
 static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
-	[I915_MOCS_UNCACHED] = {
-	  /* 0x00000009 */
-	  .control_value = LE_1_UC | LE_TC_2_LLC_ELLC,
-	  /* 0x0010 */
-	  .l3cc_value = L3_1_UC,
-	},
-	[I915_MOCS_PTE] = {
-	  /* 0x00000038 */
-	  .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3),
-	  /* 0x0030 */
-	  .l3cc_value = L3_3_WB,
-	},
+	GEN9_MOCS_ENTRIES,
 	[I915_MOCS_CACHED] = {
-	  /* 0x00000039 */
-	  .control_value = LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
-	  /* 0x0030 */
-	  .l3cc_value = L3_3_WB,
+		/* 0x00000039 */
+		.control_value = LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
+		/* 0x0030 */
+		.l3cc_value = L3_3_WB,
 	},
 };
 

From 828f31502045fbf4354723169b1b64e7719b8de7 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 23 Jan 2019 16:06:01 -0800
Subject: [PATCH 04/89] drm/i915: use a macro to define MOCS entries

Let's use a macro to make tables smaller and at the same time allow us
to add fields that apply to all entries in future.

v2: rewrap lines to respect 80 chars limit and make it more readable
    (from Chris)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Tomasz Lis <tomasz.lis@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-5-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/intel_mocs.c | 43 +++++++++++++------------------
 1 file changed, 18 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index c7a2a8d81d90..59dd74765288 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -71,6 +71,12 @@ struct drm_i915_mocs_table {
 #define L3_2_RESERVED		_L3_CACHEABILITY(2)
 #define L3_3_WB			_L3_CACHEABILITY(3)
 
+#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
+	[__idx] = { \
+		.control_value = __control_value, \
+		.l3cc_value = __l3cc_value, \
+	}
+
 /*
  * MOCS tables
  *
@@ -93,40 +99,27 @@ struct drm_i915_mocs_table {
  *       may only be updated incrementally by adding entries at the
  *       end.
  */
-
 #define GEN9_MOCS_ENTRIES \
-	[I915_MOCS_UNCACHED] = { \
-		/* 0x00000009 */ \
-		.control_value = LE_1_UC | LE_TC_2_LLC_ELLC, \
-		/* 0x0010 */ \
-		.l3cc_value = L3_1_UC, \
-	}, \
-	[I915_MOCS_PTE] = { \
-		/* 0x00000038 */ \
-		.control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
-		/* 0x0030 */ \
-		.l3cc_value = L3_3_WB, \
-	}
+	MOCS_ENTRY(I915_MOCS_UNCACHED, \
+		   LE_1_UC | LE_TC_2_LLC_ELLC, \
+		   L3_1_UC), \
+	MOCS_ENTRY(I915_MOCS_PTE, \
+		   LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
+		   L3_3_WB)
 
 static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
 	GEN9_MOCS_ENTRIES,
-	[I915_MOCS_CACHED] = {
-		/* 0x0000003b */
-		.control_value = LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
-		/* 0x0030 */
-		.l3cc_value =   L3_3_WB,
-	},
+	MOCS_ENTRY(I915_MOCS_CACHED,
+		   LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
+		   L3_3_WB)
 };
 
 /* NOTE: the LE_TGT_CACHE is not used on Broxton */
 static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 	GEN9_MOCS_ENTRIES,
-	[I915_MOCS_CACHED] = {
-		/* 0x00000039 */
-		.control_value = LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
-		/* 0x0030 */
-		.l3cc_value = L3_3_WB,
-	},
+	MOCS_ENTRY(I915_MOCS_CACHED,
+		   LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
+		   L3_3_WB)
 };
 
 /**

From 1878fce8de256dd0d263c670ae86b96b82cac735 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 23 Jan 2019 16:06:02 -0800
Subject: [PATCH 05/89] drm/i915: keep track of used entries in MOCS table

Instead of considering we have defined entries for any index in the
table, let's keep track of the ones we explicitly defined. This will
allow Gen 11 to have it's new table defined in which we have holes of
undefined entries.

Repeated comments about the meaning of undefined entries were removed
since they are overly verbose and copy-pasted in several functions: now
the definition is in the top only.

v2: add helper function to get the index (from Chris)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-6-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/intel_mocs.c | 111 +++++++++++++++++++++---------
 1 file changed, 78 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 59dd74765288..e5f0e9da258c 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -28,6 +28,7 @@
 struct drm_i915_mocs_entry {
 	u32 control_value;
 	u16 l3cc_value;
+	u16 used;
 };
 
 struct drm_i915_mocs_table {
@@ -75,6 +76,7 @@ struct drm_i915_mocs_table {
 	[__idx] = { \
 		.control_value = __control_value, \
 		.l3cc_value = __l3cc_value, \
+		.used = 1, \
 	}
 
 /*
@@ -187,6 +189,19 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
 	}
 }
 
+/*
+ * Get control_value from MOCS entry taking into account when it's not used:
+ * I915_MOCS_PTE's value is returned in this case.
+ */
+static u32 get_entry_control(const struct drm_i915_mocs_table *table,
+			     unsigned int index)
+{
+	if (table->table[index].used)
+		return table->table[index].control_value;
+
+	return table->table[I915_MOCS_PTE].control_value;
+}
+
 /**
  * intel_mocs_init_engine() - emit the mocs control table
  * @engine:	The engine for whom to emit the registers.
@@ -199,24 +214,25 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct drm_i915_mocs_table table;
 	unsigned int index;
+	u32 unused_value;
 
 	if (!get_mocs_settings(dev_priv, &table))
 		return;
 
 	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
 
-	for (index = 0; index < table.size; index++)
-		I915_WRITE(mocs_register(engine->id, index),
-			   table.table[index].control_value);
+	/* Set unused values to PTE */
+	unused_value = table.table[I915_MOCS_PTE].control_value;
 
-	/*
-	 * Now set the unused entries to PTE. These entries are officially
-	 * undefined and no contract for the contents and settings is given
-	 * for these entries.
-	 */
+	for (index = 0; index < table.size; index++) {
+		u32 value = get_entry_control(&table, index);
+
+		I915_WRITE(mocs_register(engine->id, index), value);
+	}
+
+	/* All remaining entries are also unused */
 	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
-		I915_WRITE(mocs_register(engine->id, index),
-			   table.table[I915_MOCS_PTE].control_value);
+		I915_WRITE(mocs_register(engine->id, index), unused_value);
 }
 
 /**
@@ -234,11 +250,15 @@ static int emit_mocs_control_table(struct i915_request *rq,
 {
 	enum intel_engine_id engine = rq->engine->id;
 	unsigned int index;
+	u32 unused_value;
 	u32 *cs;
 
 	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
 		return -ENODEV;
 
+	/* Set unused values to PTE */
+	unused_value = table->table[I915_MOCS_PTE].control_value;
+
 	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
@@ -246,18 +266,16 @@ static int emit_mocs_control_table(struct i915_request *rq,
 	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
 
 	for (index = 0; index < table->size; index++) {
+		u32 value = get_entry_control(table, index);
+
 		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
-		*cs++ = table->table[index].control_value;
+		*cs++ = value;
 	}
 
-	/*
-	 * Now set the unused entries to PTE. These entries are officially
-	 * undefined and no contract for the contents and settings is given
-	 * for these entries.
-	 */
+	/* All remaining entries are also unused */
 	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
 		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
-		*cs++ = table->table[I915_MOCS_PTE].control_value;
+		*cs++ = unused_value;
 	}
 
 	*cs++ = MI_NOOP;
@@ -266,12 +284,24 @@ static int emit_mocs_control_table(struct i915_request *rq,
 	return 0;
 }
 
+/*
+ * Get l3cc_value from MOCS entry taking into account when it's not used:
+ * I915_MOCS_PTE's value is returned in this case.
+ */
+static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
+			  unsigned int index)
+{
+	if (table->table[index].used)
+		return table->table[index].l3cc_value;
+
+	return table->table[I915_MOCS_PTE].l3cc_value;
+}
+
 static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
 			       u16 low,
 			       u16 high)
 {
-	return table->table[low].l3cc_value |
-	       table->table[high].l3cc_value << 16;
+	return low | high << 16;
 }
 
 /**
@@ -288,12 +318,16 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
 static int emit_mocs_l3cc_table(struct i915_request *rq,
 				const struct drm_i915_mocs_table *table)
 {
+	u16 unused_value;
 	unsigned int i;
 	u32 *cs;
 
 	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
 		return -ENODEV;
 
+	/* Set unused values to PTE */
+	unused_value = table->table[I915_MOCS_PTE].l3cc_value;
+
 	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
@@ -301,25 +335,26 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
 	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
 
 	for (i = 0; i < table->size / 2; i++) {
+		u16 low = get_entry_l3cc(table, 2 * i);
+		u16 high = get_entry_l3cc(table, 2 * i + 1);
+
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
-		*cs++ = l3cc_combine(table, 2 * i, 2 * i + 1);
+		*cs++ = l3cc_combine(table, low, high);
 	}
 
+	/* Odd table size - 1 left over */
 	if (table->size & 0x01) {
-		/* Odd table size - 1 left over */
+		u16 low = get_entry_l3cc(table, 2 * i);
+
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
-		*cs++ = l3cc_combine(table, 2 * i, I915_MOCS_PTE);
+		*cs++ = l3cc_combine(table, low, unused_value);
 		i++;
 	}
 
-	/*
-	 * Now set the unused entries to PTE. These entries are officially
-	 * undefined and no contract for the contents and settings is given
-	 * for these entries.
-	 */
+	/* All remaining entries are also unused */
 	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
-		*cs++ = l3cc_combine(table, I915_MOCS_PTE, I915_MOCS_PTE);
+		*cs++ = l3cc_combine(table, unused_value, unused_value);
 	}
 
 	*cs++ = MI_NOOP;
@@ -346,25 +381,35 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_mocs_table table;
 	unsigned int i;
+	u16 unused_value;
 
 	if (!get_mocs_settings(dev_priv, &table))
 		return;
 
-	for (i = 0; i < table.size / 2; i++)
+	/* Set unused values to PTE */
+	unused_value = table.table[I915_MOCS_PTE].l3cc_value;
+
+	for (i = 0; i < table.size / 2; i++) {
+		u16 low = get_entry_l3cc(&table, 2 * i);
+		u16 high = get_entry_l3cc(&table, 2 * i + 1);
+
 		I915_WRITE(GEN9_LNCFCMOCS(i),
-			   l3cc_combine(&table, 2 * i, 2 * i + 1));
+			   l3cc_combine(&table, low, high));
+	}
 
 	/* Odd table size - 1 left over */
 	if (table.size & 0x01) {
+		u16 low = get_entry_l3cc(&table, 2 * i);
+
 		I915_WRITE(GEN9_LNCFCMOCS(i),
-			   l3cc_combine(&table, 2 * i, I915_MOCS_PTE));
+			   l3cc_combine(&table, low, unused_value));
 		i++;
 	}
 
-	/* Now set the rest of the table to PTE */
+	/* All remaining entries are also unused */
 	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
 		I915_WRITE(GEN9_LNCFCMOCS(i),
-			   l3cc_combine(&table, I915_MOCS_PTE, I915_MOCS_PTE));
+			   l3cc_combine(&table, unused_value, unused_value));
 }
 
 /**

From 5029537f4fbb30d2640d0cda86e35b6316e7464e Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 23 Jan 2019 16:06:03 -0800
Subject: [PATCH 06/89] drm/i915: cache number of MOCS entries

Instead of checking the gen number every time we need to know the max
number of entries, just save it into the table struct so we don't need
extra branches throughout the code. This will be useful for Ice Lake
that has 64 rather than 62 defined entries. Ice Lake changes will be
added in a follow up.

v2: make size and n_entries `unsigned int` and introduce changes as a
    pre-work for the Ice Lake changes (Tvrtko)

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tomasz Lis <tomasz.lis@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-7-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/intel_mocs.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index e5f0e9da258c..6e293f91a10c 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -32,7 +32,8 @@ struct drm_i915_mocs_entry {
 };
 
 struct drm_i915_mocs_table {
-	u32 size;
+	unsigned int size;
+	unsigned int n_entries;
 	const struct drm_i915_mocs_entry *table;
 };
 
@@ -144,10 +145,12 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
 	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
 	    IS_ICELAKE(dev_priv)) {
 		table->size  = ARRAY_SIZE(skylake_mocs_table);
+		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 		table->table = skylake_mocs_table;
 		result = true;
 	} else if (IS_GEN9_LP(dev_priv)) {
 		table->size  = ARRAY_SIZE(broxton_mocs_table);
+		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 		table->table = broxton_mocs_table;
 		result = true;
 	} else {
@@ -219,8 +222,6 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 	if (!get_mocs_settings(dev_priv, &table))
 		return;
 
-	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
-
 	/* Set unused values to PTE */
 	unused_value = table.table[I915_MOCS_PTE].control_value;
 
@@ -231,7 +232,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 	}
 
 	/* All remaining entries are also unused */
-	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
+	for (; index < table.n_entries; index++)
 		I915_WRITE(mocs_register(engine->id, index), unused_value);
 }
 
@@ -253,17 +254,17 @@ static int emit_mocs_control_table(struct i915_request *rq,
 	u32 unused_value;
 	u32 *cs;
 
-	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
+	if (GEM_WARN_ON(table->size > table->n_entries))
 		return -ENODEV;
 
 	/* Set unused values to PTE */
 	unused_value = table->table[I915_MOCS_PTE].control_value;
 
-	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
+	cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
+	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
 
 	for (index = 0; index < table->size; index++) {
 		u32 value = get_entry_control(table, index);
@@ -273,7 +274,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
 	}
 
 	/* All remaining entries are also unused */
-	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
+	for (; index < table->n_entries; index++) {
 		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
 		*cs++ = unused_value;
 	}
@@ -322,17 +323,17 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
 	unsigned int i;
 	u32 *cs;
 
-	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
+	if (GEM_WARN_ON(table->size > table->n_entries))
 		return -ENODEV;
 
 	/* Set unused values to PTE */
 	unused_value = table->table[I915_MOCS_PTE].l3cc_value;
 
-	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
+	cs = intel_ring_begin(rq, 2 + table->n_entries);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
+	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
 
 	for (i = 0; i < table->size / 2; i++) {
 		u16 low = get_entry_l3cc(table, 2 * i);
@@ -352,7 +353,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
 	}
 
 	/* All remaining entries are also unused */
-	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
+	for (; i < table->n_entries / 2; i++) {
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
 		*cs++ = l3cc_combine(table, unused_value, unused_value);
 	}
@@ -407,7 +408,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
 	}
 
 	/* All remaining entries are also unused */
-	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
+	for (; i < table.n_entries / 2; i++)
 		I915_WRITE(GEN9_LNCFCMOCS(i),
 			   l3cc_combine(&table, unused_value, unused_value));
 }

From b3c316b0b869252481acc8d88138aae7619de582 Mon Sep 17 00:00:00 2001
From: Tomasz Lis <tomasz.lis@intel.com>
Date: Wed, 23 Jan 2019 16:06:04 -0800
Subject: [PATCH 07/89] drm/i915/icl: Define MOCS table for Icelake

The table has been unified across OSes to minimize virtualization overhead.

The MOCS table is now published as part of bspec, and versioned. Entries
are supposed to never be modified, but new ones can be added. Adding
entries increases table version. The patch includes version 1 entries.

Meaning of each entry is now explained in bspec, and user mode clients
are expected to know what each entry means. The 3 entries used for previous
platforms are still compatible with their legacy definitions, but that is
not guaranteed to be true for future platforms.

v2: Fixed SCC values, improved commit comment (Daniele)
v3: Improved MOCS table comment (Daniele)
v4: Moved new entries below gen9 ones. Put common entries into
    definition to be used in multiple arrays. (Lucas)
v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
    to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
v6: Removed definitions of reserved entries. (Michal)
    Increased limit of entries sent to the hardware on gen11+.
v7: Simplify table as done for previou gens (Lucas)
v8: Rebase on cached number of entries per-platform and use new
    MOCS_ENTRY() macro (Lucas)
v9: Update comment (from Tomasz)

BSpec: 34007
BSpec: 560

Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-8-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/intel_mocs.c | 132 ++++++++++++++++++++++++++++--
 1 file changed, 123 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 6e293f91a10c..331e7a678fb7 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -46,6 +46,8 @@ struct drm_i915_mocs_table {
 #define LE_SCC(value)		((value) << 8)
 #define LE_PFM(value)		((value) << 11)
 #define LE_SCF(value)		((value) << 14)
+#define LE_COS(value)		((value) << 15)
+#define LE_SSE(value)		((value) << 17)
 
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)		((value) << 0)
@@ -54,6 +56,7 @@ struct drm_i915_mocs_table {
 
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
+#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
 
 /* (e)LLC caching options */
 #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
@@ -89,18 +92,23 @@ struct drm_i915_mocs_table {
  * LNCFCMOCS0 - LNCFCMOCS32 registers.
  *
  * These tables are intended to be kept reasonably consistent across
- * platforms. However some of the fields are not applicable to all of
- * them.
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
  *
  * Entries not part of the following tables are undefined as far as
  * userspace is concerned and shouldn't be relied upon.  For the time
  * being they will be initialized to PTE.
  *
- * NOTE: These tables MUST start with being uncached and the length
- *       MUST be less than 63 as the last two registers are reserved
- *       by the hardware.  These tables are part of the kernel ABI and
- *       may only be updated incrementally by adding entries at the
- *       end.
+ * The last two entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
+ *
+ * NOTE: These tables are part of bspec and defined as part of hardware
+ *       interface for ICL+. For older platforms, they are part of kernel
+ *       ABI. It is expected that, for specific hardware platform, existing
+ *       entries will remain constant and the table will only be updated by
+ *       adding new entries, filling unused positions.
  */
 #define GEN9_MOCS_ENTRIES \
 	MOCS_ENTRY(I915_MOCS_UNCACHED, \
@@ -125,6 +133,108 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 		   L3_3_WB)
 };
 
+#define GEN11_MOCS_ENTRIES \
+	/* Base - Uncached (Deprecated) */ \
+	MOCS_ENTRY(I915_MOCS_UNCACHED, \
+		   LE_1_UC | LE_TC_1_LLC, \
+		   L3_1_UC), \
+	/* Base - L3 + LeCC:PAT (Deprecated) */ \
+	MOCS_ENTRY(I915_MOCS_PTE, \
+		   LE_0_PAGETABLE | LE_TC_1_LLC, \
+		   L3_3_WB), \
+	/* Base - L3 + LLC */ \
+	MOCS_ENTRY(2, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		   L3_3_WB), \
+	/* Base - Uncached */ \
+	MOCS_ENTRY(3, \
+		   LE_1_UC | LE_TC_1_LLC, \
+		   L3_1_UC), \
+	/* Base - L3 */ \
+	MOCS_ENTRY(4, \
+		   LE_1_UC | LE_TC_1_LLC, \
+		   L3_3_WB), \
+	/* Base - LLC */ \
+	MOCS_ENTRY(5, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		   L3_1_UC), \
+	/* Age 0 - LLC */ \
+	MOCS_ENTRY(6, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+		   L3_1_UC), \
+	/* Age 0 - L3 + LLC */ \
+	MOCS_ENTRY(7, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+		   L3_3_WB), \
+	/* Age: Don't Chg. - LLC */ \
+	MOCS_ENTRY(8, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+		   L3_1_UC), \
+	/* Age: Don't Chg. - L3 + LLC */ \
+	MOCS_ENTRY(9, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+		   L3_3_WB), \
+	/* No AOM - LLC */ \
+	MOCS_ENTRY(10, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+		   L3_1_UC), \
+	/* No AOM - L3 + LLC */ \
+	MOCS_ENTRY(11, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+		   L3_3_WB), \
+	/* No AOM; Age 0 - LLC */ \
+	MOCS_ENTRY(12, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+		   L3_1_UC), \
+	/* No AOM; Age 0 - L3 + LLC */ \
+	MOCS_ENTRY(13, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+		   L3_3_WB), \
+	/* No AOM; Age:DC - LLC */ \
+	MOCS_ENTRY(14, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+		   L3_1_UC), \
+	/* No AOM; Age:DC - L3 + LLC */ \
+	MOCS_ENTRY(15, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+		   L3_3_WB), \
+	/* Self-Snoop - L3 + LLC */ \
+	MOCS_ENTRY(18, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
+		   L3_3_WB), \
+	/* Skip Caching - L3 + LLC(12.5%) */ \
+	MOCS_ENTRY(19, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
+		   L3_3_WB), \
+	/* Skip Caching - L3 + LLC(25%) */ \
+	MOCS_ENTRY(20, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
+		   L3_3_WB), \
+	/* Skip Caching - L3 + LLC(50%) */ \
+	MOCS_ENTRY(21, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
+		   L3_3_WB), \
+	/* Skip Caching - L3 + LLC(75%) */ \
+	MOCS_ENTRY(22, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
+		   L3_3_WB), \
+	/* Skip Caching - L3 + LLC(87.5%) */ \
+	MOCS_ENTRY(23, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
+		   L3_3_WB), \
+	/* HW Reserved - SW program but never use */ \
+	MOCS_ENTRY(62, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		   L3_1_UC), \
+	/* HW Reserved - SW program but never use */ \
+	MOCS_ENTRY(63, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		   L3_1_UC)
+
+static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+	GEN11_MOCS_ENTRIES
+};
+
 /**
  * get_mocs_settings()
  * @dev_priv:	i915 device.
@@ -142,8 +252,12 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
 {
 	bool result = false;
 
-	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
-	    IS_ICELAKE(dev_priv)) {
+	if (IS_ICELAKE(dev_priv)) {
+		table->size  = ARRAY_SIZE(icelake_mocs_table);
+		table->table = icelake_mocs_table;
+		table->n_entries = GEN11_NUM_MOCS_ENTRIES;
+		result = true;
+	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
 		table->size  = ARRAY_SIZE(skylake_mocs_table);
 		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 		table->table = skylake_mocs_table;

From 8e525cb4a622148fbe30134ee3a1a34ad839a43a Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Fri, 25 Jan 2019 02:29:33 +0000
Subject: [PATCH 08/89] drm/i915/execlists: Move RPCS setup to context pin

Configuring RPCS in context image just before pin is sufficient and will
come extra handy in one of the following patches.

v2:
 * Split image setup a bit differently. (Chris Wilson)

v3:
 * Update context image after reset as well - otherwise the application
   of pinned default state clears the RPCS.

v4:
 * Use local variable throughout the function. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125023005.1007-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/intel_lrc.c | 45 ++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8aa8a4862543..9155cc675924 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1173,6 +1173,24 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 	return i915_vma_pin(vma, 0, 0, flags);
 }
 
+static u32 make_rpcs(struct drm_i915_private *dev_priv);
+
+static void
+__execlists_update_reg_state(struct intel_engine_cs *engine,
+			     struct intel_context *ce)
+{
+	u32 *regs = ce->lrc_reg_state;
+	struct intel_ring *ring = ce->ring;
+
+	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
+	regs[CTX_RING_HEAD + 1] = ring->head;
+	regs[CTX_RING_TAIL + 1] = ring->tail;
+
+	/* RPCS */
+	if (engine->class == RENDER_CLASS)
+		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915);
+}
+
 static struct intel_context *
 __execlists_context_pin(struct intel_engine_cs *engine,
 			struct i915_gem_context *ctx,
@@ -1211,10 +1229,8 @@ __execlists_context_pin(struct intel_engine_cs *engine,
 	GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head));
 
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
-	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
-		i915_ggtt_offset(ce->ring->vma);
-	ce->lrc_reg_state[CTX_RING_HEAD + 1] = ce->ring->head;
-	ce->lrc_reg_state[CTX_RING_TAIL + 1] = ce->ring->tail;
+
+	__execlists_update_reg_state(engine, ce);
 
 	ce->state->obj->pin_global++;
 	i915_gem_context_get(ctx);
@@ -1838,14 +1854,14 @@ static void execlists_reset(struct intel_engine_cs *engine,
 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
 		       engine->context_size - PAGE_SIZE);
 	}
-	execlists_init_reg_state(regs,
-				 request->gem_context, engine, request->ring);
 
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
-
 	request->ring->head = intel_ring_wrap(request->ring, request->postfix);
-	regs[CTX_RING_HEAD + 1] = request->ring->head;
+
+	execlists_init_reg_state(regs, request->gem_context, engine,
+				 request->ring);
+
+	__execlists_update_reg_state(engine, request->hw_context);
 
 	intel_ring_update_space(request->ring);
 
@@ -2534,8 +2550,7 @@ static void execlists_init_reg_state(u32 *regs,
 
 	if (rcs) {
 		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
-		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
-			make_rpcs(dev_priv));
+		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
 
 		i915_oa_init_reg_state(engine, ctx, regs);
 	}
@@ -2696,12 +2711,8 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
 
 			intel_ring_reset(ce->ring, 0);
 
-			if (ce->pin_count) { /* otherwise done in context_pin */
-				u32 *regs = ce->lrc_reg_state;
-
-				regs[CTX_RING_HEAD + 1] = ce->ring->head;
-				regs[CTX_RING_TAIL + 1] = ce->ring->tail;
-			}
+			if (ce->pin_count) /* otherwise done in context_pin */
+				__execlists_update_reg_state(engine, ce);
 		}
 	}
 }

From e1a73a54a96e80dc6009e73c9209e4f81ae22285 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 25 Jan 2019 10:05:20 +0000
Subject: [PATCH 09/89] drm/i915: Measure the required reserved size for
 request emission

Instead of tediously and fragilely counting up the number of dwords
required to emit the breadcrumb to seal a request, fake a request and
measure it automatically once during engine setup.

The downside is that this requires a fair amount of mocking to create a
proper breadcrumb. Still, should be less error prone in future as the
breadcrumb size fluctuates!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125100520.20163-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/intel_engine_cs.c       | 49 ++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c             | 12 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 24 +++++++---
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  2 +-
 drivers/gpu/drm/i915/selftests/mock_engine.c |  4 +-
 5 files changed, 77 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 2f3c71f6d313..8f738a7cd117 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -604,6 +604,47 @@ static void __intel_context_unpin(struct i915_gem_context *ctx,
 	intel_context_unpin(to_intel_context(ctx, engine));
 }
 
+struct measure_breadcrumb {
+	struct i915_request rq;
+	struct i915_timeline timeline;
+	struct intel_ring ring;
+	u32 cs[1024];
+};
+
+static int measure_breadcrumb_sz(struct intel_engine_cs *engine)
+{
+	struct measure_breadcrumb *frame;
+	unsigned int dw;
+
+	GEM_BUG_ON(!engine->i915->gt.scratch);
+
+	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+	if (!frame)
+		return -ENOMEM;
+
+	i915_timeline_init(engine->i915, &frame->timeline, "measure");
+
+	INIT_LIST_HEAD(&frame->ring.request_list);
+	frame->ring.timeline = &frame->timeline;
+	frame->ring.vaddr = frame->cs;
+	frame->ring.size = sizeof(frame->cs);
+	frame->ring.effective_size = frame->ring.size;
+	intel_ring_update_space(&frame->ring);
+
+	frame->rq.i915 = engine->i915;
+	frame->rq.engine = engine;
+	frame->rq.ring = &frame->ring;
+	frame->rq.timeline = &frame->timeline;
+
+	dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs;
+	GEM_BUG_ON(dw != engine->emit_breadcrumb_sz);
+
+	i915_timeline_fini(&frame->timeline);
+	kfree(frame);
+
+	return dw;
+}
+
 /**
  * intel_engines_init_common - initialize cengine state which might require hw access
  * @engine: Engine to initialize.
@@ -657,8 +698,16 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	if (ret)
 		goto err_breadcrumbs;
 
+	ret = measure_breadcrumb_sz(engine);
+	if (ret < 0)
+		goto err_status_page;
+
+	engine->emit_breadcrumb_sz = ret;
+
 	return 0;
 
+err_status_page:
+	cleanup_status_page(engine);
 err_breadcrumbs:
 	intel_engine_fini_breadcrumbs(engine);
 err_unpin_preempt:
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9155cc675924..d2299425cf2f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2051,15 +2051,17 @@ static int gen8_emit_flush_render(struct i915_request *request,
  * used as a workaround for not being allowed to do lite
  * restore with HEAD==TAIL (WaIdleLiteRestore).
  */
-static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
+static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 {
 	/* Ensure there's always at least one preemption point per-request. */
 	*cs++ = MI_ARB_CHECK;
 	*cs++ = MI_NOOP;
 	request->wa_tail = intel_ring_offset(request, cs);
+
+	return cs;
 }
 
-static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
@@ -2071,11 +2073,11 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
-	gen8_emit_wa_tail(request, cs);
+	return gen8_emit_wa_tail(request, cs);
 }
 static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
 
-static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
 	/* We're using qword write, seqno should be aligned to 8 bytes. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
@@ -2095,7 +2097,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
-	gen8_emit_wa_tail(request, cs);
+	return gen8_emit_wa_tail(request, cs);
 }
 static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e39e483d8d16..107c4934e2fa 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -299,7 +299,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
 	return 0;
 }
 
-static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	/* First we do the gen6_emit_post_sync_nonzero_flush w/a */
 	*cs++ = GFX_OP_PIPE_CONTROL(4);
@@ -327,6 +327,8 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
 }
 static const int gen6_rcs_emit_breadcrumb_sz = 14;
 
@@ -409,7 +411,7 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
 	return 0;
 }
 
-static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	*cs++ = GFX_OP_PIPE_CONTROL(4);
 	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -427,10 +429,12 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
 }
 static const int gen7_rcs_emit_breadcrumb_sz = 6;
 
-static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
 	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
@@ -439,11 +443,13 @@ static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
 }
 static const int gen6_xcs_emit_breadcrumb_sz = 4;
 
 #define GEN7_XCS_WA 32
-static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	int i;
 
@@ -466,6 +472,8 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
 }
 static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
 #undef GEN7_XCS_WA
@@ -861,7 +869,7 @@ static void i9xx_submit_request(struct i915_request *request)
 			intel_ring_set_tail(request->ring, request->tail));
 }
 
-static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	*cs++ = MI_FLUSH;
 
@@ -874,11 +882,13 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
 }
 static const int i9xx_emit_breadcrumb_sz = 6;
 
 #define GEN5_WA_STORES 8 /* must be at least 1! */
-static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	int i;
 
@@ -895,6 +905,8 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
 }
 static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
 #undef GEN5_WA_STORES
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c3ef0f9bf321..479bd53d4ac6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -470,7 +470,7 @@ struct intel_engine_cs {
 					 unsigned int dispatch_flags);
 #define I915_DISPATCH_SECURE BIT(0)
 #define I915_DISPATCH_PINNED BIT(1)
-	void		(*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
+	u32		*(*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
 	int		emit_breadcrumb_sz;
 
 	/* Pass the request to the hardware queue (e.g. directly into
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 442ec2aeec81..905318b7ae18 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -159,9 +159,9 @@ static int mock_emit_flush(struct i915_request *request,
 	return 0;
 }
 
-static void mock_emit_breadcrumb(struct i915_request *request,
-				 u32 *flags)
+static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs)
 {
+	return cs;
 }
 
 static void mock_submit_request(struct i915_request *request)

From 9fa4973e91be3e5cb220f7d607c21bf6e82c52d1 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 25 Jan 2019 12:00:04 +0000
Subject: [PATCH 10/89] drm/i915: Remove manual breadcumb counting

Now that we know we measure the size of the engine->emit_breadcrumb()
correctly, we can remove the previous manual counting.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125120005.25191-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_request.c     |  4 ++--
 drivers/gpu/drm/i915/intel_engine_cs.c  |  7 +++----
 drivers/gpu/drm/i915/intel_lrc.c        |  4 ----
 drivers/gpu/drm/i915/intel_ringbuffer.c | 28 +++++--------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 5 files changed, 11 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f941e40fd373..ddc35e9dc0c0 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -650,7 +650,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * around inside i915_request_add() there is sufficient space at
 	 * the beginning of the ring as well.
 	 */
-	rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32);
+	rq->reserved_space = 2 * engine->emit_breadcrumb_dw * sizeof(u32);
 
 	/*
 	 * Record the position of the start of the request so that
@@ -901,7 +901,7 @@ void i915_request_add(struct i915_request *request)
 	 * GPU processing the request, we never over-estimate the
 	 * position of the ring's HEAD.
 	 */
-	cs = intel_ring_begin(request, engine->emit_breadcrumb_sz);
+	cs = intel_ring_begin(request, engine->emit_breadcrumb_dw);
 	GEM_BUG_ON(IS_ERR(cs));
 	request->postfix = intel_ring_offset(request, cs);
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 8f738a7cd117..ef4c8c50a4ba 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -611,7 +611,7 @@ struct measure_breadcrumb {
 	u32 cs[1024];
 };
 
-static int measure_breadcrumb_sz(struct intel_engine_cs *engine)
+static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 {
 	struct measure_breadcrumb *frame;
 	unsigned int dw;
@@ -637,7 +637,6 @@ static int measure_breadcrumb_sz(struct intel_engine_cs *engine)
 	frame->rq.timeline = &frame->timeline;
 
 	dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs;
-	GEM_BUG_ON(dw != engine->emit_breadcrumb_sz);
 
 	i915_timeline_fini(&frame->timeline);
 	kfree(frame);
@@ -698,11 +697,11 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	if (ret)
 		goto err_breadcrumbs;
 
-	ret = measure_breadcrumb_sz(engine);
+	ret = measure_breadcrumb_dw(engine);
 	if (ret < 0)
 		goto err_status_page;
 
-	engine->emit_breadcrumb_sz = ret;
+	engine->emit_breadcrumb_dw = ret;
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d2299425cf2f..5551dd2ec0e6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2075,7 +2075,6 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
 
 	return gen8_emit_wa_tail(request, cs);
 }
-static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
 
 static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
@@ -2099,7 +2098,6 @@ static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 
 	return gen8_emit_wa_tail(request, cs);
 }
-static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
 
 static int gen8_init_rcs_context(struct i915_request *rq)
 {
@@ -2192,7 +2190,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_breadcrumb = gen8_emit_breadcrumb;
-	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
 
 	engine->set_default_submission = intel_execlists_set_default_submission;
 
@@ -2298,7 +2295,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 	engine->init_context = gen8_init_rcs_context;
 	engine->emit_flush = gen8_emit_flush_render;
 	engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs;
-	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz;
 
 	ret = logical_ring_init(engine);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 107c4934e2fa..09c90475168a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -330,7 +330,6 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	return cs;
 }
-static const int gen6_rcs_emit_breadcrumb_sz = 14;
 
 static int
 gen7_render_ring_cs_stall_wa(struct i915_request *rq)
@@ -432,7 +431,6 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	return cs;
 }
-static const int gen7_rcs_emit_breadcrumb_sz = 6;
 
 static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
@@ -446,7 +444,6 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	return cs;
 }
-static const int gen6_xcs_emit_breadcrumb_sz = 4;
 
 #define GEN7_XCS_WA 32
 static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
@@ -475,7 +472,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	return cs;
 }
-static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
 #undef GEN7_XCS_WA
 
 static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
@@ -885,7 +881,6 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	return cs;
 }
-static const int i9xx_emit_breadcrumb_sz = 6;
 
 #define GEN5_WA_STORES 8 /* must be at least 1! */
 static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
@@ -908,7 +903,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	return cs;
 }
-static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
 #undef GEN5_WA_STORES
 
 static void
@@ -2206,11 +2200,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	engine->request_alloc = ring_request_alloc;
 
 	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
-	engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
-	if (IS_GEN(dev_priv, 5)) {
+	if (IS_GEN(dev_priv, 5))
 		engine->emit_breadcrumb = gen5_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz;
-	}
 
 	engine->set_default_submission = i9xx_set_default_submission;
 
@@ -2240,12 +2231,10 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen7_render_ring_flush;
 		engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
 	} else if (IS_GEN(dev_priv, 6)) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen6_render_ring_flush;
 		engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
 	} else if (IS_GEN(dev_priv, 5)) {
 		engine->emit_flush = gen4_render_ring_flush;
 	} else {
@@ -2281,13 +2270,10 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 		engine->emit_flush = gen6_bsd_ring_flush;
 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 
-		if (IS_GEN(dev_priv, 6)) {
+		if (IS_GEN(dev_priv, 6))
 			engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
-			engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-		} else {
+		else
 			engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
-			engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
-		}
 	} else {
 		engine->emit_flush = bsd_ring_flush;
 		if (IS_GEN(dev_priv, 5))
@@ -2310,13 +2296,10 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
 	engine->emit_flush = gen6_ring_flush;
 	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 
-	if (IS_GEN(dev_priv, 6)) {
+	if (IS_GEN(dev_priv, 6))
 		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-	} else {
+	else
 		engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
-	}
 
 	return intel_init_ring_buffer(engine);
 }
@@ -2335,7 +2318,6 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 	engine->irq_disable = hsw_vebox_irq_disable;
 
 	engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
-	engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
 
 	return intel_init_ring_buffer(engine);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 479bd53d4ac6..0834e91d4ace 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -471,7 +471,7 @@ struct intel_engine_cs {
 #define I915_DISPATCH_SECURE BIT(0)
 #define I915_DISPATCH_PINNED BIT(1)
 	u32		*(*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
-	int		emit_breadcrumb_sz;
+	int		emit_breadcrumb_dw;
 
 	/* Pass the request to the hardware queue (e.g. directly into
 	 * the legacy ringbuffer or to the end of an execlist).

From 832a67bdb205765109d37fdfff4a97479b03b19b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 25 Jan 2019 12:00:05 +0000
Subject: [PATCH 11/89] drm/i915: Compute the HWS offsets explicitly

Simplify by using sizeof(u32) to convert from the index inside the HWSP
to the byte offset. This has the advantage of not only being shorter
(and so not upsetting checkpatch!) but that it matches use where we are
writing to byte addresses using other commands than MI_STORE_DWORD_IMM.

v2: Drop the now superfluous MI_STORE_DWORD_INDEX_SHIFT, it appears to
be a local invention so keeping it after the final use does not help to
clarify the GPU instruction.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125120005.25191-2-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/intel_gpu_commands.h   |  1 -
 drivers/gpu/drm/i915/intel_guc_submission.c |  4 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.h     | 10 +++++-----
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h
index 105e2a9e874a..b96a31bc1080 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -112,7 +112,6 @@
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 945,g33,965 */
 #define   MI_USE_GGTT		(1 << 22) /* g4x+ */
 #define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
-#define   MI_STORE_DWORD_INDEX_SHIFT 2
 /*
  * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
  * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index ab1c49b106f2..349ae5844f24 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -666,7 +666,7 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
 	execlists_unwind_incomplete_requests(execlists);
 
 	wait_for_guc_preempt_report(engine);
-	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX, 0);
+	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0);
 }
 
 /**
@@ -824,7 +824,7 @@ static void guc_submission_tasklet(unsigned long data)
 	}
 
 	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
-	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
+	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
 	    GUC_PREEMPT_FINISHED)
 		complete_preempt_context(engine);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0834e91d4ace..5ad46c2fbc0f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -716,11 +716,11 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
  * The area from dword 0x30 to 0x3ff is available for driver usage.
  */
 #define I915_GEM_HWS_INDEX		0x30
-#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
-#define I915_GEM_HWS_PREEMPT_INDEX	0x32
-#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
-#define I915_GEM_HWS_SCRATCH_INDEX	0x40
-#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
+#define I915_GEM_HWS_INDEX_ADDR		(I915_GEM_HWS_INDEX * sizeof(u32))
+#define I915_GEM_HWS_PREEMPT		0x32
+#define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32))
+#define I915_GEM_HWS_SCRATCH		0x40
+#define I915_GEM_HWS_SCRATCH_ADDR	(I915_GEM_HWS_SCRATCH * sizeof(u32))
 
 #define I915_HWS_CSB_BUF0_INDEX		0x10
 #define I915_HWS_CSB_WRITE_INDEX	0x1f

From ade8a0f59844349650514f608eb1da5280f7818b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 25 Jan 2019 13:22:26 +0000
Subject: [PATCH 12/89] drm/i915: Make all GPU resets atomic

In preparation for the next few commits, make resetting the GPU atomic.
Currently, we have prepared gen6+ for atomic resetting of individual
engines, but now there is a requirement to perform the whole device
level reset (just the register poking) from inside an atomic context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125132230.22221-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_reset.c | 90 ++++++++++++++-----------------
 1 file changed, 39 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 342d9ee42601..2f840858572c 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -12,6 +12,8 @@
 
 #include "intel_guc.h"
 
+#define RESET_MAX_RETRIES 3
+
 static void engine_skip_context(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
@@ -144,14 +146,14 @@ static int i915_do_reset(struct drm_i915_private *i915,
 
 	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	usleep_range(50, 200);
-	err = wait_for(i915_in_reset(pdev), 500);
+	udelay(50);
+	err = wait_for_atomic(i915_in_reset(pdev), 50);
 
 	/* Clear the reset request. */
 	pci_write_config_byte(pdev, I915_GDRST, 0);
-	usleep_range(50, 200);
+	udelay(50);
 	if (!err)
-		err = wait_for(!i915_in_reset(pdev), 500);
+		err = wait_for_atomic(!i915_in_reset(pdev), 50);
 
 	return err;
 }
@@ -171,7 +173,7 @@ static int g33_do_reset(struct drm_i915_private *i915,
 	struct pci_dev *pdev = i915->drm.pdev;
 
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	return wait_for(g4x_reset_complete(pdev), 500);
+	return wait_for_atomic(g4x_reset_complete(pdev), 50);
 }
 
 static int g4x_do_reset(struct drm_i915_private *dev_priv,
@@ -182,13 +184,13 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
 	int ret;
 
 	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
+	I915_WRITE_FW(VDECCLK_GATE_D,
+		      I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ_FW(VDECCLK_GATE_D);
 
 	pci_write_config_byte(pdev, I915_GDRST,
 			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
 		goto out;
@@ -196,7 +198,7 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
 
 	pci_write_config_byte(pdev, I915_GDRST,
 			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
 		goto out;
@@ -205,9 +207,9 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
 out:
 	pci_write_config_byte(pdev, I915_GDRST, 0);
 
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
+	I915_WRITE_FW(VDECCLK_GATE_D,
+		      I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ_FW(VDECCLK_GATE_D);
 
 	return ret;
 }
@@ -218,27 +220,29 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv,
 {
 	int ret;
 
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
+	I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
+	ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR,
+					   ILK_GRDOM_RESET_ENABLE, 0,
+					   5000, 0,
+					   NULL);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
 		goto out;
 	}
 
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
+	I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
+	ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR,
+					   ILK_GRDOM_RESET_ENABLE, 0,
+					   5000, 0,
+					   NULL);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
 		goto out;
 	}
 
 out:
-	I915_WRITE(ILK_GDSR, 0);
-	POSTING_READ(ILK_GDSR);
+	I915_WRITE_FW(ILK_GDSR, 0);
+	POSTING_READ_FW(ILK_GDSR);
 	return ret;
 }
 
@@ -527,32 +531,21 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
 
 int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
 {
-	reset_func reset = intel_get_gpu_reset(i915);
+	const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
+	reset_func reset;
+	int ret = -ETIMEDOUT;
 	int retry;
-	int ret;
 
-	/*
-	 * We want to perform per-engine reset from atomic context (e.g.
-	 * softirq), which imposes the constraint that we cannot sleep.
-	 * However, experience suggests that spending a bit of time waiting
-	 * for a reset helps in various cases, so for a full-device reset
-	 * we apply the opposite rule and wait if we want to. As we should
-	 * always follow up a failed per-engine reset with a full device reset,
-	 * being a little faster, stricter and more error prone for the
-	 * atomic case seems an acceptable compromise.
-	 *
-	 * Unfortunately this leads to a bimodal routine, when the goal was
-	 * to have a single reset function that worked for resetting any
-	 * number of engines simultaneously.
-	 */
-	might_sleep_if(engine_mask == ALL_ENGINES);
+	reset = intel_get_gpu_reset(i915);
+	if (!reset)
+		return -ENODEV;
 
 	/*
 	 * If the power well sleeps during the reset, the reset
 	 * request may be dropped and never completes (causing -EIO).
 	 */
 	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
-	for (retry = 0; retry < 3; retry++) {
+	for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
 		/*
 		 * We stop engines, otherwise we might get failed reset and a
 		 * dead gpu (on elk). Also as modern gpu as kbl can suffer
@@ -569,15 +562,10 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
 		 */
 		i915_stop_engines(i915, engine_mask);
 
-		ret = -ENODEV;
-		if (reset) {
-			GEM_TRACE("engine_mask=%x\n", engine_mask);
-			ret = reset(i915, engine_mask, retry);
-		}
-		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
-			break;
-
-		cond_resched();
+		GEM_TRACE("engine_mask=%x\n", engine_mask);
+		preempt_disable();
+		ret = reset(i915, engine_mask, retry);
+		preempt_enable();
 	}
 	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
 
@@ -1014,7 +1002,7 @@ void i915_reset(struct drm_i915_private *i915,
 		goto error;
 	}
 
-	for (i = 0; i < 3; i++) {
+	for (i = 0; i < RESET_MAX_RETRIES; i++) {
 		ret = intel_gpu_reset(i915, ALL_ENGINES);
 		if (ret == 0)
 			break;

From fe62365f9f80a1c1d438c54fba21f5108a182de8 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 25 Jan 2019 13:22:27 +0000
Subject: [PATCH 13/89] drm/i915/guc: Disable global reset

The guc (and huc) currently inexcruitably depend on struct_mutex for
device reinitialisation from inside the reset, and indeed taking any
mutex here is verboten (as we must be able to reset from underneath any
of our mutexes). That makes recovering the guc unviable without, for
example, reserving contiguous vma space and pages for it to use.

The plan to re-enable global reset for the GuC centres around reusing the
WOPM reserved space at the top of the aperture (that we know we can
populate a contiguous range large enough to dma xfer the fw image).

In the meantime, hopefully no one even notices as the device-reset is
only used as a backup to the per-engine resets for handling GPU hangs.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Acked-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125132230.22221-2-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_reset.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 2f840858572c..33408c4e6358 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -574,6 +574,9 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
 
 bool intel_has_gpu_reset(struct drm_i915_private *i915)
 {
+	if (USES_GUC(i915))
+		return false;
+
 	return intel_get_gpu_reset(i915);
 }
 

From eb8d0f5af4ec2d172baf8b4b9a2199cd916b4e54 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 25 Jan 2019 13:22:28 +0000
Subject: [PATCH 14/89] drm/i915: Remove GPU reset dependence on struct_mutex

Now that the submission backends are controlled via their own spinlocks,
with a wave of a magic wand we can lift the struct_mutex requirement
around GPU reset. That is we allow the submission frontend (userspace)
to keep on submitting while we process the GPU reset as we can suspend
the backend independently.

The major change is around the backoff/handoff strategy for performing
the reset. With no mutex deadlock, we no longer have to coordinate with
any waiter, and just perform the reset immediately.

Testcase: igt/gem_mmap_gtt/hang # regresses
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125132230.22221-3-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_debugfs.c           |  38 +-
 drivers/gpu/drm/i915/i915_drv.h               |   5 -
 drivers/gpu/drm/i915/i915_gem.c               |  18 +-
 drivers/gpu/drm/i915/i915_gem_fence_reg.h     |   1 -
 drivers/gpu/drm/i915/i915_gem_gtt.h           |   1 +
 drivers/gpu/drm/i915/i915_gpu_error.c         | 104 ++---
 drivers/gpu/drm/i915/i915_gpu_error.h         |  28 +-
 drivers/gpu/drm/i915/i915_request.c           |  47 --
 drivers/gpu/drm/i915/i915_reset.c             | 420 ++++++++----------
 drivers/gpu/drm/i915/i915_reset.h             |   3 +
 drivers/gpu/drm/i915/intel_engine_cs.c        |   6 +-
 drivers/gpu/drm/i915/intel_guc_submission.c   |   5 +-
 drivers/gpu/drm/i915/intel_hangcheck.c        |  28 +-
 drivers/gpu/drm/i915/intel_lrc.c              |  91 ++--
 drivers/gpu/drm/i915/intel_overlay.c          |   2 -
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  91 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.h       |  17 +-
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  57 +--
 .../drm/i915/selftests/intel_workarounds.c    |   3 -
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   4 +-
 20 files changed, 418 insertions(+), 551 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9a9e1da496dc..76dea0572f3e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1284,8 +1284,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		seq_puts(m, "Wedged\n");
 	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
 		seq_puts(m, "Reset in progress: struct_mutex backoff\n");
-	if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags))
-		seq_puts(m, "Reset in progress: reset handoff to waiter\n");
 	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
 		seq_puts(m, "Waiter holding struct mutex\n");
 	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
@@ -1321,15 +1319,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		struct rb_node *rb;
 
 		seq_printf(m, "%s:\n", engine->name);
-		seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
+		seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
 			   engine->hangcheck.seqno, seqno[id],
-			   intel_engine_last_submit(engine));
-		seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n",
+			   intel_engine_last_submit(engine),
+			   jiffies_to_msecs(jiffies -
+					    engine->hangcheck.action_timestamp));
+		seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
 			   yesno(intel_engine_has_waiter(engine)),
 			   yesno(test_bit(engine->id,
-					  &dev_priv->gpu_error.missed_irq_rings)),
-			   yesno(engine->hangcheck.stalled),
-			   yesno(engine->hangcheck.wedged));
+					  &dev_priv->gpu_error.missed_irq_rings)));
 
 		spin_lock_irq(&b->rb_lock);
 		for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
@@ -1343,11 +1341,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
 			   (long long)engine->hangcheck.acthd,
 			   (long long)acthd[id]);
-		seq_printf(m, "\taction = %s(%d) %d ms ago\n",
-			   hangcheck_action_to_str(engine->hangcheck.action),
-			   engine->hangcheck.action,
-			   jiffies_to_msecs(jiffies -
-					    engine->hangcheck.action_timestamp));
 
 		if (engine->id == RCS) {
 			seq_puts(m, "\tinstdone read =\n");
@@ -3911,8 +3904,6 @@ static int
 i915_wedged_set(void *data, u64 val)
 {
 	struct drm_i915_private *i915 = data;
-	struct intel_engine_cs *engine;
-	unsigned int tmp;
 
 	/*
 	 * There is no safeguard against this debugfs entry colliding
@@ -3925,18 +3916,8 @@ i915_wedged_set(void *data, u64 val)
 	if (i915_reset_backoff(&i915->gpu_error))
 		return -EAGAIN;
 
-	for_each_engine_masked(engine, i915, val, tmp) {
-		engine->hangcheck.seqno = intel_engine_get_seqno(engine);
-		engine->hangcheck.stalled = true;
-	}
-
 	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
 			  "Manually set wedged engine mask = %llx", val);
-
-	wait_on_bit(&i915->gpu_error.flags,
-		    I915_RESET_HANDOFF,
-		    TASK_UNINTERRUPTIBLE);
-
 	return 0;
 }
 
@@ -4091,13 +4072,8 @@ i915_drop_caches_set(void *data, u64 val)
 		mutex_unlock(&i915->drm.struct_mutex);
 	}
 
-	if (val & DROP_RESET_ACTIVE &&
-	    i915_terminally_wedged(&i915->gpu_error)) {
+	if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error))
 		i915_handle_error(i915, ALL_ENGINES, 0, NULL);
-		wait_on_bit(&i915->gpu_error.flags,
-			    I915_RESET_HANDOFF,
-			    TASK_UNINTERRUPTIBLE);
-	}
 
 	fs_reclaim_acquire(GFP_KERNEL);
 	if (val & DROP_BOUND)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3c111ad09922..0133d1da3d3c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3001,11 +3001,6 @@ static inline bool i915_reset_backoff(struct i915_gpu_error *error)
 	return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
 }
 
-static inline bool i915_reset_handoff(struct i915_gpu_error *error)
-{
-	return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags));
-}
-
 static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
 	return unlikely(test_bit(I915_WEDGED, &error->flags));
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b359390ba22c..d20b42386c3c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -657,11 +657,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
 		     struct intel_rps_client *rps_client)
 {
 	might_sleep();
-#if IS_ENABLED(CONFIG_LOCKDEP)
-	GEM_BUG_ON(debug_locks &&
-		   !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
-		   !!(flags & I915_WAIT_LOCKED));
-#endif
 	GEM_BUG_ON(timeout < 0);
 
 	timeout = i915_gem_object_wait_reservation(obj->resv,
@@ -4493,8 +4488,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 
 	GEM_TRACE("\n");
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	wakeref = intel_runtime_pm_get(i915);
 	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
 
@@ -4520,6 +4513,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
 	intel_runtime_pm_put(i915, wakeref);
 
+	mutex_lock(&i915->drm.struct_mutex);
 	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 }
@@ -4534,6 +4528,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	wakeref = intel_runtime_pm_get(i915);
 	intel_suspend_gt_powersave(i915);
 
+	flush_workqueue(i915->wq);
+
 	mutex_lock(&i915->drm.struct_mutex);
 
 	/*
@@ -4563,11 +4559,9 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	i915_retire_requests(i915); /* ensure we flush after wedging */
 
 	mutex_unlock(&i915->drm.struct_mutex);
+	i915_reset_flush(i915);
 
-	intel_uc_suspend(i915);
-
-	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
-	cancel_delayed_work_sync(&i915->gt.retire_work);
+	drain_delayed_work(&i915->gt.retire_work);
 
 	/*
 	 * As the idle_work is rearming if it detects a race, play safe and
@@ -4575,6 +4569,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	 */
 	drain_delayed_work(&i915->gt.idle_work);
 
+	intel_uc_suspend(i915);
+
 	/*
 	 * Assert that we successfully flushed all the work and
 	 * reset the GPU back to its idle, low power state.
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index 99a31ded4dfd..09dcaf14121b 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -50,4 +50,3 @@ struct drm_i915_fence_reg {
 };
 
 #endif
-
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 9229b03d629b..a0039ea97cdc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -39,6 +39,7 @@
 #include <linux/pagevec.h>
 
 #include "i915_request.h"
+#include "i915_reset.h"
 #include "i915_selftest.h"
 #include "i915_timeline.h"
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 1f8e80e31b49..4eef0462489c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -533,10 +533,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  waiting: %s\n", yesno(ee->waiting));
 	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
 	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
-	err_printf(m, "  hangcheck stall: %s\n", yesno(ee->hangcheck_stalled));
-	err_printf(m, "  hangcheck action: %s\n",
-		   hangcheck_action_to_str(ee->hangcheck_action));
-	err_printf(m, "  hangcheck action timestamp: %dms (%lu%s)\n",
+	err_printf(m, "  hangcheck timestamp: %dms (%lu%s)\n",
 		   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
 		   ee->hangcheck_timestamp,
 		   ee->hangcheck_timestamp == epoch ? "; epoch" : "");
@@ -684,15 +681,15 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 		   jiffies_to_msecs(error->capture - error->epoch));
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
-		if (error->engine[i].hangcheck_stalled &&
-		    error->engine[i].context.pid) {
-			err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
-				   engine_name(m->i915, i),
-				   error->engine[i].context.comm,
-				   error->engine[i].context.pid,
-				   error->engine[i].context.ban_score,
-				   bannable(&error->engine[i].context));
-		}
+		if (!error->engine[i].context.pid)
+			continue;
+
+		err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
+			   engine_name(m->i915, i),
+			   error->engine[i].context.comm,
+			   error->engine[i].context.pid,
+			   error->engine[i].context.ban_score,
+			   bannable(&error->engine[i].context));
 	}
 	err_printf(m, "Reset count: %u\n", error->reset_count);
 	err_printf(m, "Suspend count: %u\n", error->suspend_count);
@@ -1144,7 +1141,8 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
 	return i;
 }
 
-/* Generate a semi-unique error code. The code is not meant to have meaning, The
+/*
+ * Generate a semi-unique error code. The code is not meant to have meaning, The
  * code's only purpose is to try to prevent false duplicated bug reports by
  * grossly estimating a GPU error state.
  *
@@ -1153,29 +1151,23 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
  *
  * It's only a small step better than a random number in its current form.
  */
-static u32 i915_error_generate_code(struct drm_i915_private *dev_priv,
-				    struct i915_gpu_state *error,
-				    int *engine_id)
+static u32 i915_error_generate_code(struct i915_gpu_state *error,
+				    unsigned long engine_mask)
 {
-	u32 error_code = 0;
-	int i;
-
-	/* IPEHR would be an ideal way to detect errors, as it's the gross
+	/*
+	 * IPEHR would be an ideal way to detect errors, as it's the gross
 	 * measure of "the command that hung." However, has some very common
 	 * synchronization commands which almost always appear in the case
 	 * strictly a client bug. Use instdone to differentiate those some.
 	 */
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		if (error->engine[i].hangcheck_stalled) {
-			if (engine_id)
-				*engine_id = i;
+	if (engine_mask) {
+		struct drm_i915_error_engine *ee =
+			&error->engine[ffs(engine_mask)];
 
-			return error->engine[i].ipehr ^
-			       error->engine[i].instdone.instdone;
-		}
+		return ee->ipehr ^ ee->instdone.instdone;
 	}
 
-	return error_code;
+	return 0;
 }
 
 static void gem_record_fences(struct i915_gpu_state *error)
@@ -1338,9 +1330,8 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	}
 
 	ee->idle = intel_engine_is_idle(engine);
-	ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
-	ee->hangcheck_action = engine->hangcheck.action;
-	ee->hangcheck_stalled = engine->hangcheck.stalled;
+	if (!ee->idle)
+		ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
 	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
 						  engine);
 
@@ -1783,31 +1774,35 @@ static void capture_reg_state(struct i915_gpu_state *error)
 	error->pgtbl_er = I915_READ(PGTBL_ER);
 }
 
-static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
-				   struct i915_gpu_state *error,
-				   u32 engine_mask,
-				   const char *error_msg)
+static const char *
+error_msg(struct i915_gpu_state *error, unsigned long engines, const char *msg)
 {
-	u32 ecode;
-	int engine_id = -1, len;
+	int len;
+	int i;
 
-	ecode = i915_error_generate_code(dev_priv, error, &engine_id);
+	for (i = 0; i < ARRAY_SIZE(error->engine); i++)
+		if (!error->engine[i].context.pid)
+			engines &= ~BIT(i);
 
 	len = scnprintf(error->error_msg, sizeof(error->error_msg),
-			"GPU HANG: ecode %d:%d:0x%08x",
-			INTEL_GEN(dev_priv), engine_id, ecode);
-
-	if (engine_id != -1 && error->engine[engine_id].context.pid)
+			"GPU HANG: ecode %d:%lx:0x%08x",
+			INTEL_GEN(error->i915), engines,
+			i915_error_generate_code(error, engines));
+	if (engines) {
+		/* Just show the first executing process, more is confusing */
+		i = ffs(engines);
 		len += scnprintf(error->error_msg + len,
 				 sizeof(error->error_msg) - len,
 				 ", in %s [%d]",
-				 error->engine[engine_id].context.comm,
-				 error->engine[engine_id].context.pid);
+				 error->engine[i].context.comm,
+				 error->engine[i].context.pid);
+	}
+	if (msg)
+		len += scnprintf(error->error_msg + len,
+				 sizeof(error->error_msg) - len,
+				 ", %s", msg);
 
-	scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
-		  ", reason: %s, action: %s",
-		  error_msg,
-		  engine_mask ? "reset" : "continue");
+	return error->error_msg;
 }
 
 static void capture_gen_state(struct i915_gpu_state *error)
@@ -1847,7 +1842,7 @@ static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		const struct drm_i915_error_engine *ee = &error->engine[i];
 
-		if (ee->hangcheck_stalled &&
+		if (ee->hangcheck_timestamp &&
 		    time_before(ee->hangcheck_timestamp, epoch))
 			epoch = ee->hangcheck_timestamp;
 	}
@@ -1921,7 +1916,7 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
  * i915_capture_error_state - capture an error record for later analysis
  * @i915: i915 device
  * @engine_mask: the mask of engines triggering the hang
- * @error_msg: a message to insert into the error capture header
+ * @msg: a message to insert into the error capture header
  *
  * Should be called when an error is detected (either a hang or an error
  * interrupt) to capture error state from the time of the error.  Fills
@@ -1929,8 +1924,8 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
  * to pick up.
  */
 void i915_capture_error_state(struct drm_i915_private *i915,
-			      u32 engine_mask,
-			      const char *error_msg)
+			      unsigned long engine_mask,
+			      const char *msg)
 {
 	static bool warned;
 	struct i915_gpu_state *error;
@@ -1946,8 +1941,7 @@ void i915_capture_error_state(struct drm_i915_private *i915,
 	if (IS_ERR(error))
 		return;
 
-	i915_error_capture_msg(i915, error, engine_mask, error_msg);
-	DRM_INFO("%s\n", error->error_msg);
+	dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg));
 
 	if (!error->simulated) {
 		spin_lock_irqsave(&i915->gpu_error.lock, flags);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 604291f7762d..231173786eae 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -85,8 +85,6 @@ struct i915_gpu_state {
 		bool waiting;
 		int num_waiters;
 		unsigned long hangcheck_timestamp;
-		bool hangcheck_stalled;
-		enum intel_engine_hangcheck_action hangcheck_action;
 		struct i915_address_space *vm;
 		int num_requests;
 		u32 reset_count;
@@ -197,6 +195,8 @@ struct i915_gpu_state {
 	struct scatterlist *sgl, *fit;
 };
 
+struct i915_gpu_restart;
+
 struct i915_gpu_error {
 	/* For hangcheck timer */
 #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
@@ -247,15 +247,6 @@ struct i915_gpu_error {
 	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
 	 * secondary role in preventing two concurrent global reset attempts.
 	 *
-	 * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the
-	 * struct_mutex. We try to acquire the struct_mutex in the reset worker,
-	 * but it may be held by some long running waiter (that we cannot
-	 * interrupt without causing trouble). Once we are ready to do the GPU
-	 * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If
-	 * they already hold the struct_mutex and want to participate they can
-	 * inspect the bit and do the reset directly, otherwise the worker
-	 * waits for the struct_mutex.
-	 *
 	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
 	 * acquire the struct_mutex to reset an engine, we need an explicit
 	 * flag to prevent two concurrent reset attempts in the same engine.
@@ -269,20 +260,13 @@ struct i915_gpu_error {
 	 */
 	unsigned long flags;
 #define I915_RESET_BACKOFF	0
-#define I915_RESET_HANDOFF	1
-#define I915_RESET_MODESET	2
-#define I915_RESET_ENGINE	3
+#define I915_RESET_MODESET	1
+#define I915_RESET_ENGINE	2
 #define I915_WEDGED		(BITS_PER_LONG - 1)
 
 	/** Number of times an engine has been reset */
 	u32 reset_engine_count[I915_NUM_ENGINES];
 
-	/** Set of stalled engines with guilty requests, in the current reset */
-	u32 stalled_mask;
-
-	/** Reason for the current *global* reset */
-	const char *reason;
-
 	struct mutex wedge_mutex; /* serialises wedging/unwedging */
 
 	/**
@@ -299,6 +283,8 @@ struct i915_gpu_error {
 
 	/* For missed irq/seqno simulation. */
 	unsigned long test_irq_rings;
+
+	struct i915_gpu_restart *restart;
 };
 
 struct drm_i915_error_state_buf {
@@ -320,7 +306,7 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 
 struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
 void i915_capture_error_state(struct drm_i915_private *dev_priv,
-			      u32 engine_mask,
+			      unsigned long engine_mask,
 			      const char *error_msg);
 
 static inline struct i915_gpu_state *
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index ddc35e9dc0c0..f4241a17e2ad 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1083,18 +1083,6 @@ static bool __i915_spin_request(const struct i915_request *rq,
 	return false;
 }
 
-static bool __i915_wait_request_check_and_reset(struct i915_request *request)
-{
-	struct i915_gpu_error *error = &request->i915->gpu_error;
-
-	if (likely(!i915_reset_handoff(error)))
-		return false;
-
-	__set_current_state(TASK_RUNNING);
-	i915_reset(request->i915, error->stalled_mask, error->reason);
-	return true;
-}
-
 /**
  * i915_request_wait - wait until execution of request has finished
  * @rq: the request to wait upon
@@ -1120,17 +1108,10 @@ long i915_request_wait(struct i915_request *rq,
 {
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-	wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
-	DEFINE_WAIT_FUNC(reset, default_wake_function);
 	DEFINE_WAIT_FUNC(exec, default_wake_function);
 	struct intel_wait wait;
 
 	might_sleep();
-#if IS_ENABLED(CONFIG_LOCKDEP)
-	GEM_BUG_ON(debug_locks &&
-		   !!lockdep_is_held(&rq->i915->drm.struct_mutex) !=
-		   !!(flags & I915_WAIT_LOCKED));
-#endif
 	GEM_BUG_ON(timeout < 0);
 
 	if (i915_request_completed(rq))
@@ -1140,11 +1121,7 @@ long i915_request_wait(struct i915_request *rq,
 		return -ETIME;
 
 	trace_i915_request_wait_begin(rq, flags);
-
 	add_wait_queue(&rq->execute, &exec);
-	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(errq, &reset);
-
 	intel_wait_init(&wait);
 	if (flags & I915_WAIT_PRIORITY)
 		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
@@ -1155,10 +1132,6 @@ restart:
 		if (intel_wait_update_request(&wait, rq))
 			break;
 
-		if (flags & I915_WAIT_LOCKED &&
-		    __i915_wait_request_check_and_reset(rq))
-			continue;
-
 		if (signal_pending_state(state, current)) {
 			timeout = -ERESTARTSYS;
 			goto complete;
@@ -1188,9 +1161,6 @@ restart:
 		 */
 		goto wakeup;
 
-	if (flags & I915_WAIT_LOCKED)
-		__i915_wait_request_check_and_reset(rq);
-
 	for (;;) {
 		if (signal_pending_state(state, current)) {
 			timeout = -ERESTARTSYS;
@@ -1214,21 +1184,6 @@ wakeup:
 		if (i915_request_completed(rq))
 			break;
 
-		/*
-		 * If the GPU is hung, and we hold the lock, reset the GPU
-		 * and then check for completion. On a full reset, the engine's
-		 * HW seqno will be advanced passed us and we are complete.
-		 * If we do a partial reset, we have to wait for the GPU to
-		 * resume and update the breadcrumb.
-		 *
-		 * If we don't hold the mutex, we can just wait for the worker
-		 * to come along and update the breadcrumb (either directly
-		 * itself, or indirectly by recovering the GPU).
-		 */
-		if (flags & I915_WAIT_LOCKED &&
-		    __i915_wait_request_check_and_reset(rq))
-			continue;
-
 		/* Only spin if we know the GPU is processing this request */
 		if (__i915_spin_request(rq, wait.seqno, state, 2))
 			break;
@@ -1242,8 +1197,6 @@ wakeup:
 	intel_engine_remove_wait(rq->engine, &wait);
 complete:
 	__set_current_state(TASK_RUNNING);
-	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(errq, &reset);
 	remove_wait_queue(&rq->execute, &exec);
 	trace_i915_request_wait_end(rq);
 
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 33408c4e6358..68af017ee548 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
 
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
@@ -14,27 +15,31 @@
 
 #define RESET_MAX_RETRIES 3
 
+/* XXX How to handle concurrent GGTT updates using tiling registers? */
+#define RESET_UNDER_STOP_MACHINE 0
+
 static void engine_skip_context(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
 	struct i915_gem_context *hung_ctx = rq->gem_context;
 	struct i915_timeline *timeline = rq->timeline;
-	unsigned long flags;
 
+	lockdep_assert_held(&engine->timeline.lock);
 	GEM_BUG_ON(timeline == &engine->timeline);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
 	spin_lock(&timeline->lock);
 
-	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
-		if (rq->gem_context == hung_ctx)
-			i915_request_skip(rq, -EIO);
+	if (rq->global_seqno) {
+		list_for_each_entry_continue(rq,
+					     &engine->timeline.requests, link)
+			if (rq->gem_context == hung_ctx)
+				i915_request_skip(rq, -EIO);
+	}
 
 	list_for_each_entry(rq, &timeline->requests, link)
 		i915_request_skip(rq, -EIO);
 
 	spin_unlock(&timeline->lock);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static void client_mark_guilty(struct drm_i915_file_private *file_priv,
@@ -61,7 +66,7 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
 	}
 }
 
-static void context_mark_guilty(struct i915_gem_context *ctx)
+static bool context_mark_guilty(struct i915_gem_context *ctx)
 {
 	unsigned int score;
 	bool banned, bannable;
@@ -74,7 +79,7 @@ static void context_mark_guilty(struct i915_gem_context *ctx)
 
 	/* Cool contexts don't accumulate client ban score */
 	if (!bannable)
-		return;
+		return false;
 
 	if (banned) {
 		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
@@ -85,6 +90,8 @@ static void context_mark_guilty(struct i915_gem_context *ctx)
 
 	if (!IS_ERR_OR_NULL(ctx->file_priv))
 		client_mark_guilty(ctx->file_priv, ctx);
+
+	return banned;
 }
 
 static void context_mark_innocent(struct i915_gem_context *ctx)
@@ -92,6 +99,21 @@ static void context_mark_innocent(struct i915_gem_context *ctx)
 	atomic_inc(&ctx->active_count);
 }
 
+void i915_reset_request(struct i915_request *rq, bool guilty)
+{
+	lockdep_assert_held(&rq->engine->timeline.lock);
+	GEM_BUG_ON(i915_request_completed(rq));
+
+	if (guilty) {
+		i915_request_skip(rq, -EIO);
+		if (context_mark_guilty(rq->gem_context))
+			engine_skip_context(rq);
+	} else {
+		dma_fence_set_error(&rq->fence, -EAGAIN);
+		context_mark_innocent(rq->gem_context);
+	}
+}
+
 static void gen3_stop_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -604,11 +626,8 @@ int intel_reset_guc(struct drm_i915_private *i915)
  * Ensure irq handler finishes, and not run again.
  * Also return the active request so that we only search for it once.
  */
-static struct i915_request *
-reset_prepare_engine(struct intel_engine_cs *engine)
+static void reset_prepare_engine(struct intel_engine_cs *engine)
 {
-	struct i915_request *rq;
-
 	/*
 	 * During the reset sequence, we must prevent the engine from
 	 * entering RC6. As the context state is undefined until we restart
@@ -617,174 +636,116 @@ reset_prepare_engine(struct intel_engine_cs *engine)
 	 * GPU state upon resume, i.e. fail to restart after a reset.
 	 */
 	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
-
-	rq = engine->reset.prepare(engine);
-	if (rq && rq->fence.error == -EIO)
-		rq = ERR_PTR(-EIO); /* Previous reset failed! */
-
-	return rq;
+	engine->reset.prepare(engine);
 }
 
-static int reset_prepare(struct drm_i915_private *i915)
+static void reset_prepare(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
-	struct i915_request *rq;
 	enum intel_engine_id id;
-	int err = 0;
 
-	for_each_engine(engine, i915, id) {
-		rq = reset_prepare_engine(engine);
-		if (IS_ERR(rq)) {
-			err = PTR_ERR(rq);
-			continue;
-		}
+	for_each_engine(engine, i915, id)
+		reset_prepare_engine(engine);
 
-		engine->hangcheck.active_request = rq;
-	}
-
-	i915_gem_revoke_fences(i915);
 	intel_uc_sanitize(i915);
-
-	return err;
 }
 
-/* Returns the request if it was guilty of the hang */
-static struct i915_request *
-reset_request(struct intel_engine_cs *engine,
-	      struct i915_request *rq,
-	      bool stalled)
-{
-	/*
-	 * The guilty request will get skipped on a hung engine.
-	 *
-	 * Users of client default contexts do not rely on logical
-	 * state preserved between batches so it is safe to execute
-	 * queued requests following the hang. Non default contexts
-	 * rely on preserved state, so skipping a batch loses the
-	 * evolution of the state and it needs to be considered corrupted.
-	 * Executing more queued batches on top of corrupted state is
-	 * risky. But we take the risk by trying to advance through
-	 * the queued requests in order to make the client behaviour
-	 * more predictable around resets, by not throwing away random
-	 * amount of batches it has prepared for execution. Sophisticated
-	 * clients can use gem_reset_stats_ioctl and dma fence status
-	 * (exported via sync_file info ioctl on explicit fences) to observe
-	 * when it loses the context state and should rebuild accordingly.
-	 *
-	 * The context ban, and ultimately the client ban, mechanism are safety
-	 * valves if client submission ends up resulting in nothing more than
-	 * subsequent hangs.
-	 */
-
-	if (i915_request_completed(rq)) {
-		GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n",
-			  engine->name, rq->global_seqno,
-			  rq->fence.context, rq->fence.seqno,
-			  intel_engine_get_seqno(engine));
-		stalled = false;
-	}
-
-	if (stalled) {
-		context_mark_guilty(rq->gem_context);
-		i915_request_skip(rq, -EIO);
-
-		/* If this context is now banned, skip all pending requests. */
-		if (i915_gem_context_is_banned(rq->gem_context))
-			engine_skip_context(rq);
-	} else {
-		/*
-		 * Since this is not the hung engine, it may have advanced
-		 * since the hang declaration. Double check by refinding
-		 * the active request at the time of the reset.
-		 */
-		rq = i915_gem_find_active_request(engine);
-		if (rq) {
-			unsigned long flags;
-
-			context_mark_innocent(rq->gem_context);
-			dma_fence_set_error(&rq->fence, -EAGAIN);
-
-			/* Rewind the engine to replay the incomplete rq */
-			spin_lock_irqsave(&engine->timeline.lock, flags);
-			rq = list_prev_entry(rq, link);
-			if (&rq->link == &engine->timeline.requests)
-				rq = NULL;
-			spin_unlock_irqrestore(&engine->timeline.lock, flags);
-		}
-	}
-
-	return rq;
-}
-
-static void reset_engine(struct intel_engine_cs *engine,
-			 struct i915_request *rq,
-			 bool stalled)
-{
-	if (rq)
-		rq = reset_request(engine, rq, stalled);
-
-	/* Setup the CS to resume from the breadcrumb of the hung request */
-	engine->reset.reset(engine, rq);
-}
-
-static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
+static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	int err;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
+	/*
+	 * Everything depends on having the GTT running, so we need to start
+	 * there.
+	 */
+	err = i915_ggtt_enable_hw(i915);
+	if (err)
+		return err;
 
-	i915_retire_requests(i915);
-
-	for_each_engine(engine, i915, id) {
-		struct intel_context *ce;
-
-		reset_engine(engine,
-			     engine->hangcheck.active_request,
-			     stalled_mask & ENGINE_MASK(id));
-		ce = fetch_and_zero(&engine->last_retired_context);
-		if (ce)
-			intel_context_unpin(ce);
-
-		/*
-		 * Ostensibily, we always want a context loaded for powersaving,
-		 * so if the engine is idle after the reset, send a request
-		 * to load our scratch kernel_context.
-		 *
-		 * More mysteriously, if we leave the engine idle after a reset,
-		 * the next userspace batch may hang, with what appears to be
-		 * an incoherent read by the CS (presumably stale TLB). An
-		 * empty request appears sufficient to paper over the glitch.
-		 */
-		if (intel_engine_is_idle(engine)) {
-			struct i915_request *rq;
-
-			rq = i915_request_alloc(engine, i915->kernel_context);
-			if (!IS_ERR(rq))
-				i915_request_add(rq);
-		}
-	}
+	for_each_engine(engine, i915, id)
+		intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id));
 
 	i915_gem_restore_fences(i915);
+
+	return err;
 }
 
 static void reset_finish_engine(struct intel_engine_cs *engine)
 {
 	engine->reset.finish(engine);
-
 	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
 }
 
+struct i915_gpu_restart {
+	struct work_struct work;
+	struct drm_i915_private *i915;
+};
+
+static void restart_work(struct work_struct *work)
+{
+	struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
+	struct drm_i915_private *i915 = arg->i915;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(i915);
+	mutex_lock(&i915->drm.struct_mutex);
+	WRITE_ONCE(i915->gpu_error.restart, NULL);
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+
+		/*
+		 * Ostensibily, we always want a context loaded for powersaving,
+		 * so if the engine is idle after the reset, send a request
+		 * to load our scratch kernel_context.
+		 */
+		if (!intel_engine_is_idle(engine))
+			continue;
+
+		rq = i915_request_alloc(engine, i915->kernel_context);
+		if (!IS_ERR(rq))
+			i915_request_add(rq);
+	}
+
+	mutex_unlock(&i915->drm.struct_mutex);
+	intel_runtime_pm_put(i915, wakeref);
+
+	kfree(arg);
+}
+
 static void reset_finish(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	for_each_engine(engine, i915, id) {
-		engine->hangcheck.active_request = NULL;
+	for_each_engine(engine, i915, id)
 		reset_finish_engine(engine);
+}
+
+static void reset_restart(struct drm_i915_private *i915)
+{
+	struct i915_gpu_restart *arg;
+
+	/*
+	 * Following the reset, ensure that we always reload context for
+	 * powersaving, and to correct engine->last_retired_context. Since
+	 * this requires us to submit a request, queue a worker to do that
+	 * task for us to evade any locking here.
+	 */
+	if (READ_ONCE(i915->gpu_error.restart))
+		return;
+
+	arg = kmalloc(sizeof(*arg), GFP_KERNEL);
+	if (arg) {
+		arg->i915 = i915;
+		INIT_WORK(&arg->work, restart_work);
+
+		WRITE_ONCE(i915->gpu_error.restart, arg);
+		queue_work(i915->wq, &arg->work);
 	}
 }
 
@@ -873,8 +834,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	struct i915_timeline *tl;
 	bool ret = false;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	if (!test_bit(I915_WEDGED, &error->flags))
 		return true;
 
@@ -897,9 +856,9 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	 */
 	list_for_each_entry(tl, &i915->gt.timelines, link) {
 		struct i915_request *rq;
+		long timeout;
 
-		rq = i915_gem_active_peek(&tl->last_request,
-					  &i915->drm.struct_mutex);
+		rq = i915_gem_active_get_unlocked(&tl->last_request);
 		if (!rq)
 			continue;
 
@@ -914,12 +873,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 		 * and when the seqno passes the fence, the signaler
 		 * then signals the fence waking us up).
 		 */
-		if (dma_fence_default_wait(&rq->fence, true,
-					   MAX_SCHEDULE_TIMEOUT) < 0)
+		timeout = dma_fence_default_wait(&rq->fence, true,
+						 MAX_SCHEDULE_TIMEOUT);
+		i915_request_put(rq);
+		if (timeout < 0)
 			goto unlock;
 	}
-	i915_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
 
 	intel_engines_sanitize(i915, false);
 
@@ -933,7 +892,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	 * context and do not require stop_machine().
 	 */
 	intel_engines_reset_default_submission(i915);
-	i915_gem_contexts_lost(i915);
 
 	GEM_TRACE("end\n");
 
@@ -946,6 +904,52 @@ unlock:
 	return ret;
 }
 
+struct __i915_reset {
+	struct drm_i915_private *i915;
+	unsigned int stalled_mask;
+};
+
+static int __i915_reset__BKL(void *data)
+{
+	struct __i915_reset *arg = data;
+	int err;
+
+	err = intel_gpu_reset(arg->i915, ALL_ENGINES);
+	if (err)
+		return err;
+
+	return gt_reset(arg->i915, arg->stalled_mask);
+}
+
+#if RESET_UNDER_STOP_MACHINE
+/*
+ * XXX An alternative to using stop_machine would be to park only the
+ * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP)
+ * we should be able to prevent their memmory accesses via the lost fence
+ * registers over the course of the reset without the potential recursive
+ * of mutexes between the pagefault handler and reset.
+ *
+ * See igt/gem_mmap_gtt/hang
+ */
+#define __do_reset(fn, arg) stop_machine(fn, arg, NULL)
+#else
+#define __do_reset(fn, arg) fn(arg)
+#endif
+
+static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
+{
+	struct __i915_reset arg = { i915, stalled_mask };
+	int err, i;
+
+	err = __do_reset(__i915_reset__BKL, &arg);
+	for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
+		msleep(100);
+		err = __do_reset(__i915_reset__BKL, &arg);
+	}
+
+	return err;
+}
+
 /**
  * i915_reset - reset chip after a hang
  * @i915: #drm_i915_private to reset
@@ -971,31 +975,22 @@ void i915_reset(struct drm_i915_private *i915,
 {
 	struct i915_gpu_error *error = &i915->gpu_error;
 	int ret;
-	int i;
 
 	GEM_TRACE("flags=%lx\n", error->flags);
 
 	might_sleep();
-	lockdep_assert_held(&i915->drm.struct_mutex);
 	assert_rpm_wakelock_held(i915);
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
 
-	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
-		return;
-
 	/* Clear any previous failed attempts at recovery. Time to try again. */
 	if (!i915_gem_unset_wedged(i915))
-		goto wakeup;
+		return;
 
 	if (reason)
 		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
 	error->reset_count++;
 
-	ret = reset_prepare(i915);
-	if (ret) {
-		dev_err(i915->drm.dev, "GPU recovery failed\n");
-		goto taint;
-	}
+	reset_prepare(i915);
 
 	if (!intel_has_gpu_reset(i915)) {
 		if (i915_modparams.reset)
@@ -1005,32 +1000,11 @@ void i915_reset(struct drm_i915_private *i915,
 		goto error;
 	}
 
-	for (i = 0; i < RESET_MAX_RETRIES; i++) {
-		ret = intel_gpu_reset(i915, ALL_ENGINES);
-		if (ret == 0)
-			break;
-
-		msleep(100);
-	}
-	if (ret) {
+	if (do_reset(i915, stalled_mask)) {
 		dev_err(i915->drm.dev, "Failed to reset chip\n");
 		goto taint;
 	}
 
-	/* Ok, now get things going again... */
-
-	/*
-	 * Everything depends on having the GTT running, so we need to start
-	 * there.
-	 */
-	ret = i915_ggtt_enable_hw(i915);
-	if (ret) {
-		DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
-			  ret);
-		goto error;
-	}
-
-	gt_reset(i915, stalled_mask);
 	intel_overlay_reset(i915);
 
 	/*
@@ -1052,9 +1026,8 @@ void i915_reset(struct drm_i915_private *i915,
 
 finish:
 	reset_finish(i915);
-wakeup:
-	clear_bit(I915_RESET_HANDOFF, &error->flags);
-	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
+	if (!i915_terminally_wedged(error))
+		reset_restart(i915);
 	return;
 
 taint:
@@ -1073,7 +1046,6 @@ taint:
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 error:
 	i915_gem_set_wedged(i915);
-	i915_retire_requests(i915);
 	goto finish;
 }
 
@@ -1099,18 +1071,16 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
 int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 {
 	struct i915_gpu_error *error = &engine->i915->gpu_error;
-	struct i915_request *active_request;
 	int ret;
 
 	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 
-	active_request = reset_prepare_engine(engine);
-	if (IS_ERR_OR_NULL(active_request)) {
-		/* Either the previous reset failed, or we pardon the reset. */
-		ret = PTR_ERR(active_request);
-		goto out;
-	}
+	if (i915_seqno_passed(intel_engine_get_seqno(engine),
+			      intel_engine_last_submit(engine)))
+		return 0;
+
+	reset_prepare_engine(engine);
 
 	if (msg)
 		dev_notice(engine->i915->drm.dev,
@@ -1134,7 +1104,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 	 * active request and can drop it, adjust head to skip the offending
 	 * request to resume executing remaining requests in the queue.
 	 */
-	reset_engine(engine, active_request, true);
+	intel_engine_reset(engine, true);
 
 	/*
 	 * The engine and its registers (and workarounds in case of render)
@@ -1171,30 +1141,7 @@ static void i915_reset_device(struct drm_i915_private *i915,
 	i915_wedge_on_timeout(&w, i915, 5 * HZ) {
 		intel_prepare_reset(i915);
 
-		error->reason = reason;
-		error->stalled_mask = engine_mask;
-
-		/* Signal that locked waiters should reset the GPU */
-		smp_mb__before_atomic();
-		set_bit(I915_RESET_HANDOFF, &error->flags);
-		wake_up_all(&error->wait_queue);
-
-		/*
-		 * Wait for anyone holding the lock to wakeup, without
-		 * blocking indefinitely on struct_mutex.
-		 */
-		do {
-			if (mutex_trylock(&i915->drm.struct_mutex)) {
-				i915_reset(i915, engine_mask, reason);
-				mutex_unlock(&i915->drm.struct_mutex);
-			}
-		} while (wait_on_bit_timeout(&error->flags,
-					     I915_RESET_HANDOFF,
-					     TASK_UNINTERRUPTIBLE,
-					     1));
-
-		error->stalled_mask = 0;
-		error->reason = NULL;
+		i915_reset(i915, engine_mask, reason);
 
 		intel_finish_reset(i915);
 	}
@@ -1350,6 +1297,25 @@ out:
 	intel_runtime_pm_put(i915, wakeref);
 }
 
+bool i915_reset_flush(struct drm_i915_private *i915)
+{
+	int err;
+
+	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+
+	flush_workqueue(i915->wq);
+	GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_gem_wait_for_idle(i915,
+				     I915_WAIT_LOCKED |
+				     I915_WAIT_FOR_IDLE_BOOST,
+				     MAX_SCHEDULE_TIMEOUT);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	return !err;
+}
+
 static void i915_wedge_me(struct work_struct *work)
 {
 	struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h
index b6a519bde67d..f2d347f319df 100644
--- a/drivers/gpu/drm/i915/i915_reset.h
+++ b/drivers/gpu/drm/i915/i915_reset.h
@@ -29,6 +29,9 @@ void i915_reset(struct drm_i915_private *i915,
 int i915_reset_engine(struct intel_engine_cs *engine,
 		      const char *reason);
 
+void i915_reset_request(struct i915_request *rq, bool guilty);
+bool i915_reset_flush(struct drm_i915_private *i915);
+
 bool intel_has_gpu_reset(struct drm_i915_private *i915);
 bool intel_has_reset_engine(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index ef4c8c50a4ba..1a5c163b98d6 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1119,10 +1119,8 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
 	if (!reset_engines(i915) && !force)
 		return;
 
-	for_each_engine(engine, i915, id) {
-		if (engine->reset.reset)
-			engine->reset.reset(engine, NULL);
-	}
+	for_each_engine(engine, i915, id)
+		intel_engine_reset(engine, false);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 349ae5844f24..45e2db683fe5 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -834,8 +834,7 @@ static void guc_submission_tasklet(unsigned long data)
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
-static struct i915_request *
-guc_reset_prepare(struct intel_engine_cs *engine)
+static void guc_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
@@ -861,8 +860,6 @@ guc_reset_prepare(struct intel_engine_cs *engine)
 	 */
 	if (engine->i915->guc.preempt_wq)
 		flush_workqueue(engine->i915->guc.preempt_wq);
-
-	return i915_gem_find_active_request(engine);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 741441daae32..5662d6fed523 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -25,6 +25,17 @@
 #include "i915_drv.h"
 #include "i915_reset.h"
 
+struct hangcheck {
+	u64 acthd;
+	u32 seqno;
+	enum intel_engine_hangcheck_action action;
+	unsigned long action_timestamp;
+	int deadlock;
+	struct intel_instdone instdone;
+	bool wedged:1;
+	bool stalled:1;
+};
+
 static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
 {
 	u32 tmp = current_instdone | *old_instdone;
@@ -119,25 +130,22 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 }
 
 static void hangcheck_load_sample(struct intel_engine_cs *engine,
-				  struct intel_engine_hangcheck *hc)
+				  struct hangcheck *hc)
 {
 	hc->acthd = intel_engine_get_active_head(engine);
 	hc->seqno = intel_engine_get_seqno(engine);
 }
 
 static void hangcheck_store_sample(struct intel_engine_cs *engine,
-				   const struct intel_engine_hangcheck *hc)
+				   const struct hangcheck *hc)
 {
 	engine->hangcheck.acthd = hc->acthd;
 	engine->hangcheck.seqno = hc->seqno;
-	engine->hangcheck.action = hc->action;
-	engine->hangcheck.stalled = hc->stalled;
-	engine->hangcheck.wedged = hc->wedged;
 }
 
 static enum intel_engine_hangcheck_action
 hangcheck_get_action(struct intel_engine_cs *engine,
-		     const struct intel_engine_hangcheck *hc)
+		     const struct hangcheck *hc)
 {
 	if (engine->hangcheck.seqno != hc->seqno)
 		return ENGINE_ACTIVE_SEQNO;
@@ -149,7 +157,7 @@ hangcheck_get_action(struct intel_engine_cs *engine,
 }
 
 static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
-					struct intel_engine_hangcheck *hc)
+					struct hangcheck *hc)
 {
 	unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
 
@@ -265,19 +273,19 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
 
 	for_each_engine(engine, dev_priv, id) {
-		struct intel_engine_hangcheck hc;
+		struct hangcheck hc;
 
 		hangcheck_load_sample(engine, &hc);
 		hangcheck_accumulate_sample(engine, &hc);
 		hangcheck_store_sample(engine, &hc);
 
-		if (engine->hangcheck.stalled) {
+		if (hc.stalled) {
 			hung |= intel_engine_flag(engine);
 			if (hc.action != ENGINE_DEAD)
 				stuck |= intel_engine_flag(engine);
 		}
 
-		if (engine->hangcheck.wedged)
+		if (hc.wedged)
 			wedged |= intel_engine_flag(engine);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5551dd2ec0e6..185867106c14 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -136,6 +136,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_reset.h"
 #include "i915_vgpu.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
@@ -264,7 +265,8 @@ static void unwind_wa_tail(struct i915_request *rq)
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
 
-static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
+static struct i915_request *
+__unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq, *rn, *active = NULL;
 	struct list_head *uninitialized_var(pl);
@@ -306,6 +308,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		list_move_tail(&active->sched.link,
 			       i915_sched_lookup_priolist(engine, prio));
 	}
+
+	return active;
 }
 
 void
@@ -1732,11 +1736,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	return 0;
 }
 
-static struct i915_request *
-execlists_reset_prepare(struct intel_engine_cs *engine)
+static void execlists_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct i915_request *request, *active;
 	unsigned long flags;
 
 	GEM_TRACE("%s: depth<-%d\n", engine->name,
@@ -1752,59 +1754,21 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 	 * prevents the race.
 	 */
 	__tasklet_disable_sync_once(&execlists->tasklet);
+	GEM_BUG_ON(!reset_in_progress(execlists));
 
+	/* And flush any current direct submission. */
 	spin_lock_irqsave(&engine->timeline.lock, flags);
-
-	/*
-	 * We want to flush the pending context switches, having disabled
-	 * the tasklet above, we can assume exclusive access to the execlists.
-	 * For this allows us to catch up with an inflight preemption event,
-	 * and avoid blaming an innocent request if the stall was due to the
-	 * preemption itself.
-	 */
-	process_csb(engine);
-
-	/*
-	 * The last active request can then be no later than the last request
-	 * now in ELSP[0]. So search backwards from there, so that if the GPU
-	 * has advanced beyond the last CSB update, it will be pardoned.
-	 */
-	active = NULL;
-	request = port_request(execlists->port);
-	if (request) {
-		/*
-		 * Prevent the breadcrumb from advancing before we decide
-		 * which request is currently active.
-		 */
-		intel_engine_stop_cs(engine);
-
-		list_for_each_entry_from_reverse(request,
-						 &engine->timeline.requests,
-						 link) {
-			if (__i915_request_completed(request,
-						     request->global_seqno))
-				break;
-
-			active = request;
-		}
-	}
-
+	process_csb(engine); /* drain preemption events */
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-	return active;
 }
 
-static void execlists_reset(struct intel_engine_cs *engine,
-			    struct i915_request *request)
+static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct i915_request *rq;
 	unsigned long flags;
 	u32 *regs;
 
-	GEM_TRACE("%s request global=%d, current=%d\n",
-		  engine->name, request ? request->global_seqno : 0,
-		  intel_engine_get_seqno(engine));
-
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	/*
@@ -1819,12 +1783,18 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	execlists_cancel_port_requests(execlists);
 
 	/* Push back any incomplete requests for replay after the reset. */
-	__unwind_incomplete_requests(engine);
+	rq = __unwind_incomplete_requests(engine);
 
 	/* Following the reset, we need to reload the CSB read/write pointers */
 	reset_csb_pointers(&engine->execlists);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
+		  engine->name,
+		  rq ? rq->global_seqno : 0,
+		  intel_engine_get_seqno(engine),
+		  yesno(stalled));
+	if (!rq)
+		goto out_unlock;
 
 	/*
 	 * If the request was innocent, we leave the request in the ELSP
@@ -1837,8 +1807,9 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * and have to at least restore the RING register in the context
 	 * image back to the expected values to skip over the guilty request.
 	 */
-	if (!request || request->fence.error != -EIO)
-		return;
+	i915_reset_request(rq, stalled);
+	if (!stalled)
+		goto out_unlock;
 
 	/*
 	 * We want a simple context + ring to execute the breadcrumb update.
@@ -1848,7 +1819,7 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	regs = request->hw_context->lrc_reg_state;
+	regs = rq->hw_context->lrc_reg_state;
 	if (engine->pinned_default_state) {
 		memcpy(regs, /* skip restoring the vanilla PPHWSP */
 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
@@ -1856,17 +1827,14 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	}
 
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-	request->ring->head = intel_ring_wrap(request->ring, request->postfix);
+	rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix);
+	intel_ring_update_space(rq->ring);
 
-	execlists_init_reg_state(regs, request->gem_context, engine,
-				 request->ring);
+	execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring);
+	__execlists_update_reg_state(engine, rq->hw_context);
 
-	__execlists_update_reg_state(engine, request->hw_context);
-
-	intel_ring_update_space(request->ring);
-
-	/* Reset WaIdleLiteRestore:bdw,skl as well */
-	unwind_wa_tail(request);
+out_unlock:
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static void execlists_reset_finish(struct intel_engine_cs *engine)
@@ -1879,6 +1847,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
 	 * to sleep before we restart and reload a context.
 	 *
 	 */
+	GEM_BUG_ON(!reset_in_progress(execlists));
 	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
 		execlists->tasklet.func(execlists->tasklet.data);
 
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index c81db81e4416..f68c7975006c 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -478,8 +478,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv)
 	if (!overlay)
 		return;
 
-	intel_overlay_release_old_vid(overlay);
-
 	overlay->old_xscale = 0;
 	overlay->old_yscale = 0;
 	overlay->crtc = NULL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 09c90475168a..a9efc8c71254 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -33,6 +33,7 @@
 
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_reset.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_workarounds.h"
@@ -711,52 +712,80 @@ out:
 	return ret;
 }
 
-static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
+static void reset_prepare(struct intel_engine_cs *engine)
 {
 	intel_engine_stop_cs(engine);
-	return i915_gem_find_active_request(engine);
 }
 
-static void skip_request(struct i915_request *rq)
+static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 {
-	void *vaddr = rq->ring->vaddr;
+	struct i915_timeline *tl = &engine->timeline;
+	struct i915_request *pos, *rq;
+	unsigned long flags;
 	u32 head;
 
-	head = rq->infix;
-	if (rq->postfix < head) {
-		memset32(vaddr + head, MI_NOOP,
-			 (rq->ring->size - head) / sizeof(u32));
-		head = 0;
+	rq = NULL;
+	spin_lock_irqsave(&tl->lock, flags);
+	list_for_each_entry(pos, &tl->requests, link) {
+		if (!__i915_request_completed(pos, pos->global_seqno)) {
+			rq = pos;
+			break;
+		}
 	}
-	memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32));
-}
-
-static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq)
-{
-	GEM_TRACE("%s request global=%d, current=%d\n",
-		  engine->name, rq ? rq->global_seqno : 0,
-		  intel_engine_get_seqno(engine));
 
+	GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
+		  engine->name,
+		  rq ? rq->global_seqno : 0,
+		  intel_engine_get_seqno(engine),
+		  yesno(stalled));
 	/*
-	 * Try to restore the logical GPU state to match the continuation
-	 * of the request queue. If we skip the context/PD restore, then
-	 * the next request may try to execute assuming that its context
-	 * is valid and loaded on the GPU and so may try to access invalid
-	 * memory, prompting repeated GPU hangs.
+	 * The guilty request will get skipped on a hung engine.
 	 *
-	 * If the request was guilty, we still restore the logical state
-	 * in case the next request requires it (e.g. the aliasing ppgtt),
-	 * but skip over the hung batch.
+	 * Users of client default contexts do not rely on logical
+	 * state preserved between batches so it is safe to execute
+	 * queued requests following the hang. Non default contexts
+	 * rely on preserved state, so skipping a batch loses the
+	 * evolution of the state and it needs to be considered corrupted.
+	 * Executing more queued batches on top of corrupted state is
+	 * risky. But we take the risk by trying to advance through
+	 * the queued requests in order to make the client behaviour
+	 * more predictable around resets, by not throwing away random
+	 * amount of batches it has prepared for execution. Sophisticated
+	 * clients can use gem_reset_stats_ioctl and dma fence status
+	 * (exported via sync_file info ioctl on explicit fences) to observe
+	 * when it loses the context state and should rebuild accordingly.
 	 *
-	 * If the request was innocent, we try to replay the request with
-	 * the restored context.
+	 * The context ban, and ultimately the client ban, mechanism are safety
+	 * valves if client submission ends up resulting in nothing more than
+	 * subsequent hangs.
 	 */
+
 	if (rq) {
-		/* If the rq hung, jump to its breadcrumb and skip the batch */
-		rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
-		if (rq->fence.error == -EIO)
-			skip_request(rq);
+		/*
+		 * Try to restore the logical GPU state to match the
+		 * continuation of the request queue. If we skip the
+		 * context/PD restore, then the next request may try to execute
+		 * assuming that its context is valid and loaded on the GPU and
+		 * so may try to access invalid memory, prompting repeated GPU
+		 * hangs.
+		 *
+		 * If the request was guilty, we still restore the logical
+		 * state in case the next request requires it (e.g. the
+		 * aliasing ppgtt), but skip over the hung batch.
+		 *
+		 * If the request was innocent, we try to replay the request
+		 * with the restored context.
+		 */
+		i915_reset_request(rq, stalled);
+
+		GEM_BUG_ON(rq->ring != engine->buffer);
+		head = rq->head;
+	} else {
+		head = engine->buffer->tail;
 	}
+	engine->buffer->head = intel_ring_wrap(engine->buffer, head);
+
+	spin_unlock_irqrestore(&tl->lock, flags);
 }
 
 static void reset_finish(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5ad46c2fbc0f..f2effd001540 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -120,13 +120,8 @@ struct intel_instdone {
 struct intel_engine_hangcheck {
 	u64 acthd;
 	u32 seqno;
-	enum intel_engine_hangcheck_action action;
 	unsigned long action_timestamp;
-	int deadlock;
 	struct intel_instdone instdone;
-	struct i915_request *active_request;
-	bool stalled:1;
-	bool wedged:1;
 };
 
 struct intel_ring {
@@ -444,9 +439,8 @@ struct intel_engine_cs {
 	int		(*init_hw)(struct intel_engine_cs *engine);
 
 	struct {
-		struct i915_request *(*prepare)(struct intel_engine_cs *engine);
-		void (*reset)(struct intel_engine_cs *engine,
-			      struct i915_request *rq);
+		void (*prepare)(struct intel_engine_cs *engine);
+		void (*reset)(struct intel_engine_cs *engine, bool stalled);
 		void (*finish)(struct intel_engine_cs *engine);
 	} reset;
 
@@ -1018,6 +1012,13 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
 	return cs;
 }
 
+static inline void intel_engine_reset(struct intel_engine_cs *engine,
+				      bool stalled)
+{
+	if (engine->reset.reset)
+		engine->reset.reset(engine, stalled);
+}
+
 void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
 
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 12550b55c42f..67431355cd6e 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -363,9 +363,7 @@ static int igt_global_reset(void *arg)
 	/* Check that we can issue a global GPU reset */
 
 	igt_global_reset_lock(i915);
-	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	reset_count = i915_reset_count(&i915->gpu_error);
 
 	i915_reset(i915, ALL_ENGINES, NULL);
@@ -374,9 +372,7 @@ static int igt_global_reset(void *arg)
 		pr_err("No GPU reset recorded!\n");
 		err = -EINVAL;
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
-	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 	igt_global_reset_unlock(i915);
 
 	if (i915_terminally_wedged(&i915->gpu_error))
@@ -399,9 +395,7 @@ static int igt_wedged_reset(void *arg)
 	i915_gem_set_wedged(i915);
 	GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
 
-	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 	i915_reset(i915, ALL_ENGINES, NULL);
-	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 
 	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -511,7 +505,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 				break;
 			}
 
-			if (!wait_for_idle(engine)) {
+			if (!i915_reset_flush(i915)) {
 				struct drm_printer p =
 					drm_info_printer(i915->drm.dev);
 
@@ -903,20 +897,13 @@ static int igt_reset_engines(void *arg)
 	return 0;
 }
 
-static u32 fake_hangcheck(struct i915_request *rq, u32 mask)
+static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask)
 {
-	struct i915_gpu_error *error = &rq->i915->gpu_error;
-	u32 reset_count = i915_reset_count(error);
+	u32 count = i915_reset_count(&i915->gpu_error);
 
-	error->stalled_mask = mask;
+	i915_reset(i915, mask, NULL);
 
-	/* set_bit() must be after we have setup the backchannel (mask) */
-	smp_mb__before_atomic();
-	set_bit(I915_RESET_HANDOFF, &error->flags);
-
-	wake_up_all(&error->wait_queue);
-
-	return reset_count;
+	return count;
 }
 
 static int igt_reset_wait(void *arg)
@@ -962,7 +949,7 @@ static int igt_reset_wait(void *arg)
 		goto out_rq;
 	}
 
-	reset_count = fake_hangcheck(rq, ALL_ENGINES);
+	reset_count = fake_hangcheck(i915, ALL_ENGINES);
 
 	timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
 	if (timeout < 0) {
@@ -972,7 +959,6 @@ static int igt_reset_wait(void *arg)
 		goto out_rq;
 	}
 
-	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 	if (i915_reset_count(&i915->gpu_error) == reset_count) {
 		pr_err("No GPU reset recorded!\n");
 		err = -EINVAL;
@@ -1162,7 +1148,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 	}
 
 out_reset:
-	fake_hangcheck(rq, intel_engine_flag(rq->engine));
+	fake_hangcheck(rq->i915, intel_engine_flag(rq->engine));
 
 	if (tsk) {
 		struct igt_wedge_me w;
@@ -1341,12 +1327,7 @@ static int igt_reset_queue(void *arg)
 				goto fini;
 			}
 
-			reset_count = fake_hangcheck(prev, ENGINE_MASK(id));
-
-			i915_reset(i915, ENGINE_MASK(id), NULL);
-
-			GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
-					    &i915->gpu_error.flags));
+			reset_count = fake_hangcheck(i915, ENGINE_MASK(id));
 
 			if (prev->fence.error != -EIO) {
 				pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
@@ -1565,6 +1546,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
 		pr_err("%s(%s): Failed to start request %llx, at %x\n",
 		       __func__, engine->name,
 		       rq->fence.seqno, hws_seqno(&h, rq));
+		i915_gem_set_wedged(i915);
 		err = -EIO;
 	}
 
@@ -1588,7 +1570,6 @@ out:
 static void force_reset(struct drm_i915_private *i915)
 {
 	i915_gem_set_wedged(i915);
-	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 	i915_reset(i915, 0, NULL);
 }
 
@@ -1618,6 +1599,26 @@ static int igt_atomic_reset(void *arg)
 	if (i915_terminally_wedged(&i915->gpu_error))
 		goto unlock;
 
+	if (intel_has_gpu_reset(i915)) {
+		const typeof(*phases) *p;
+
+		for (p = phases; p->name; p++) {
+			GEM_TRACE("intel_gpu_reset under %s\n", p->name);
+
+			p->critical_section_begin();
+			err = intel_gpu_reset(i915, ALL_ENGINES);
+			p->critical_section_end();
+
+			if (err) {
+				pr_err("intel_gpu_reset failed under %s\n",
+				       p->name);
+				goto out;
+			}
+		}
+
+		force_reset(i915);
+	}
+
 	if (intel_has_reset_engine(i915)) {
 		struct intel_engine_cs *engine;
 		enum intel_engine_id id;
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index a8cac56be835..b15c4f26c593 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -214,7 +214,6 @@ out_put:
 
 static int do_device_reset(struct intel_engine_cs *engine)
 {
-	set_bit(I915_RESET_HANDOFF, &engine->i915->gpu_error.flags);
 	i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds");
 	return 0;
 }
@@ -394,7 +393,6 @@ static int
 live_gpu_reset_gt_engine_workarounds(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct i915_gpu_error *error = &i915->gpu_error;
 	intel_wakeref_t wakeref;
 	struct wa_lists lists;
 	bool ok;
@@ -413,7 +411,6 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
 	if (!ok)
 		goto out;
 
-	set_bit(I915_RESET_HANDOFF, &error->flags);
 	i915_reset(i915, ALL_ENGINES, "live_workarounds");
 
 	ok = verify_gt_engine_wa(i915, &lists, "after reset");
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 5477ad4a7e7d..8ab5a2688a0c 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -58,8 +58,8 @@ static void mock_device_release(struct drm_device *dev)
 	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	cancel_delayed_work_sync(&i915->gt.retire_work);
-	cancel_delayed_work_sync(&i915->gt.idle_work);
+	drain_delayed_work(&i915->gt.retire_work);
+	drain_delayed_work(&i915->gt.idle_work);
 	i915_gem_drain_workqueue(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);

From f3dccbdbdd94a3b255f9661df6048c4a6e372db0 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 25 Jan 2019 13:22:29 +0000
Subject: [PATCH 15/89] drm/i915/selftests: Trim struct_mutex duration for
 set-wedged selftest

Trim the struct_mutex hold and exclude the call to i915_gem_set_wedged()
as a reminder that it must be callable without struct_mutex held.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125132230.22221-4-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 67431355cd6e..8025c7e0bf6c 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -389,16 +389,16 @@ static int igt_wedged_reset(void *arg)
 	/* Check that we can recover a wedged device with a GPU reset */
 
 	igt_global_reset_lock(i915);
-	mutex_lock(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(i915);
 
 	i915_gem_set_wedged(i915);
-	GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
 
+	mutex_lock(&i915->drm.struct_mutex);
+	GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
 	i915_reset(i915, ALL_ENGINES, NULL);
+	mutex_unlock(&i915->drm.struct_mutex);
 
 	intel_runtime_pm_put(i915, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	igt_global_reset_unlock(i915);
 
 	return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
@@ -1675,6 +1675,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 
 	wakeref = intel_runtime_pm_get(i915);
 	saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
+	drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */
 
 	err = i915_subtests(tests, i915);
 

From 9b974bde4d4ad44c0458f922373340d54cb0452c Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 25 Jan 2019 13:22:30 +0000
Subject: [PATCH 16/89] drm/i915: Issue engine resets onto idle engines

Always perform the requested reset, even if we believe the engine is
idle. Presumably there was a reason the caller wanted the reset, and in
the near future we lose the easy tracking for whether the engine is
idle.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125132230.22221-5-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_reset.c             |  4 ----
 .../gpu/drm/i915/selftests/intel_hangcheck.c  | 22 +++++--------------
 2 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 68af017ee548..99bd3bc336b3 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -1076,10 +1076,6 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 
-	if (i915_seqno_passed(intel_engine_get_seqno(engine),
-			      intel_engine_last_submit(engine)))
-		return 0;
-
 	reset_prepare_engine(engine);
 
 	if (msg)
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 8025c7e0bf6c..2c38ea5892d9 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -449,8 +449,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 
 		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
-			u32 seqno = intel_engine_get_seqno(engine);
-
 			if (active) {
 				struct i915_request *rq;
 
@@ -479,8 +477,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 					break;
 				}
 
-				GEM_BUG_ON(!rq->global_seqno);
-				seqno = rq->global_seqno - 1;
 				i915_request_put(rq);
 			}
 
@@ -496,11 +492,10 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 				break;
 			}
 
-			reset_engine_count += active;
 			if (i915_reset_engine_count(&i915->gpu_error, engine) !=
-			    reset_engine_count) {
-				pr_err("%s engine reset %srecorded!\n",
-				       engine->name, active ? "not " : "");
+			    ++reset_engine_count) {
+				pr_err("%s engine reset not recorded!\n",
+				       engine->name);
 				err = -EINVAL;
 				break;
 			}
@@ -728,7 +723,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 
 		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
-			u32 seqno = intel_engine_get_seqno(engine);
 			struct i915_request *rq = NULL;
 
 			if (flags & TEST_ACTIVE) {
@@ -756,9 +750,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 					err = -EIO;
 					break;
 				}
-
-				GEM_BUG_ON(!rq->global_seqno);
-				seqno = rq->global_seqno - 1;
 			}
 
 			err = i915_reset_engine(engine, NULL);
@@ -795,10 +786,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 
 		reported = i915_reset_engine_count(&i915->gpu_error, engine);
 		reported -= threads[engine->id].resets;
-		if (reported != (flags & TEST_ACTIVE ? count : 0)) {
-			pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n",
-			       engine->name, test_name, count, reported,
-			       (flags & TEST_ACTIVE ? count : 0));
+		if (reported != count) {
+			pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
+			       engine->name, test_name, count, reported);
 			if (!err)
 				err = -EINVAL;
 		}

From 32db0b6501d97b09e92e70caefc74fa35aa9a8d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 27 Nov 2018 22:05:50 +0200
Subject: [PATCH 17/89] drm/i915: Don't try to use the hardware frame counter
 with i965gm TV output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On i965gm the hardware frame counter does not work when
the TV encoder is active. So let's not try to consult
the hardware frame counter in that case. Instead we'll
fall back to the timestamp based guesstimation method
used on gen2.

Note that the pipe timings generated by the TV encoder
are also rather peculiar. Apparently the pipe wants to
run at a much higher speed (related to the oversample
clock somehow it seems) but during the vertical active
period the TV encoder stalls the pipe every few lines
to keep its speed in check. But once the vertical
blanking period is reached the pipe gets to run at full
speed. This means our vblank timestamp estimates are
suspect. Fixing all that would require quite a bit
more work. This simple fix at least avoids the nasty
vblank timeouts that are happening currently.

Curiously the frame counter works just fine on i945gm
and gm45. I don't really understand what kind of mishap
occurred with the hardware design on i965gm. Sadly
I wasn't able to find any chicken bits etc. that would
fix the frame counter :(

v2: Move the zero vs. non-zero hw counter value handling
    into i915_get_vblank_counter() (Daniel)
    Use the per-crtc maximum exclusively, leaving the
    per-device maximum at zero
v3: max_vblank_count not populated yet in intel_enable_pipe()
    use intel_crtc_max_vblank_count() instead

Cc: stable@vger.kernel.org
Cc: Daniel Vetter <daniel@ffwll.ch>
Fixes: 51e31d49c890 ("drm/i915: Use generic vblank wait")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93782
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190122125149.GE5527@ideak-desk.fi.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c      | 27 ++++++++++------
 drivers/gpu/drm/i915/intel_display.c | 48 +++++++++++++++++++++++-----
 2 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7056ae2d1e0e..926e05f0db24 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -821,11 +821,26 @@ static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv)
 static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
+	const struct drm_display_mode *mode = &vblank->hwmode;
 	i915_reg_t high_frame, low_frame;
 	u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal;
-	const struct drm_display_mode *mode = &dev->vblank[pipe].hwmode;
 	unsigned long irqflags;
 
+	/*
+	 * On i965gm TV output the frame counter only works up to
+	 * the point when we enable the TV encoder. After that the
+	 * frame counter ceases to work and reads zero. We need a
+	 * vblank wait before enabling the TV encoder and so we
+	 * have to enable vblank interrupts while the frame counter
+	 * is still in a working state. However the core vblank code
+	 * does not like us returning non-zero frame counter values
+	 * when we've told it that we don't have a working frame
+	 * counter. Thus we must stop non-zero values leaking out.
+	 */
+	if (!vblank->max_vblank_count)
+		return 0;
+
 	htotal = mode->crtc_htotal;
 	hsync_start = mode->crtc_hsync_start;
 	vbl_start = mode->crtc_vblank_start;
@@ -4579,16 +4594,10 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	if (INTEL_GEN(dev_priv) >= 8)
 		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
 
-	if (IS_GEN(dev_priv, 2)) {
-		/* Gen2 doesn't have a hardware frame counter */
-		dev->max_vblank_count = 0;
-	} else if (IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) {
-		dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */
+	if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
 		dev->driver->get_vblank_counter = g4x_get_vblank_counter;
-	} else {
+	else if (INTEL_GEN(dev_priv) >= 3)
 		dev->driver->get_vblank_counter = i915_get_vblank_counter;
-		dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
-	}
 
 	/*
 	 * Opt out of the vblank disable timer on everything except gen2.
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 36c1126cbc85..67a64cbbe851 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1758,6 +1758,35 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc)
 		return crtc->pipe;
 }
 
+static u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+
+	/*
+	 * On i965gm the hardware frame counter reads
+	 * zero when the TV encoder is enabled :(
+	 */
+	if (IS_I965GM(dev_priv) &&
+	    (crtc_state->output_types & BIT(INTEL_OUTPUT_TVOUT)))
+		return 0;
+
+	if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
+		return 0xffffffff; /* full 32 bit counter */
+	else if (INTEL_GEN(dev_priv) >= 3)
+		return 0xffffff; /* only 24 bits of frame count */
+	else
+		return 0; /* Gen2 doesn't have a hardware frame counter */
+}
+
+static void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+
+	drm_crtc_set_max_vblank_count(&crtc->base,
+				      intel_crtc_max_vblank_count(crtc_state));
+	drm_crtc_vblank_on(&crtc->base);
+}
+
 static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
@@ -1810,7 +1839,7 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state)
 	 * when it's derived from the timestamps. So let's wait for the
 	 * pipe to start properly before we call drm_crtc_vblank_on()
 	 */
-	if (dev_priv->drm.max_vblank_count == 0)
+	if (intel_crtc_max_vblank_count(new_crtc_state) == 0)
 		intel_wait_for_pipe_scanline_moving(crtc);
 }
 
@@ -5678,7 +5707,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config,
 		ironlake_pch_enable(old_intel_state, pipe_config);
 
 	assert_vblank_disabled(crtc);
-	drm_crtc_vblank_on(crtc);
+	intel_crtc_vblank_on(pipe_config);
 
 	intel_encoders_enable(crtc, pipe_config, old_state);
 
@@ -5832,7 +5861,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
 		intel_ddi_set_vc_payload_alloc(pipe_config, true);
 
 	assert_vblank_disabled(crtc);
-	drm_crtc_vblank_on(crtc);
+	intel_crtc_vblank_on(pipe_config);
 
 	intel_encoders_enable(crtc, pipe_config, old_state);
 
@@ -6171,7 +6200,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config,
 	intel_enable_pipe(pipe_config);
 
 	assert_vblank_disabled(crtc);
-	drm_crtc_vblank_on(crtc);
+	intel_crtc_vblank_on(pipe_config);
 
 	intel_encoders_enable(crtc, pipe_config, old_state);
 }
@@ -6230,7 +6259,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
 	intel_enable_pipe(pipe_config);
 
 	assert_vblank_disabled(crtc);
-	drm_crtc_vblank_on(crtc);
+	intel_crtc_vblank_on(pipe_config);
 
 	intel_encoders_enable(crtc, pipe_config, old_state);
 }
@@ -12778,8 +12807,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
 u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
+	struct drm_vblank_crtc *vblank = &dev->vblank[drm_crtc_index(&crtc->base)];
 
-	if (!dev->max_vblank_count)
+	if (!vblank->max_vblank_count)
 		return (u32)drm_crtc_accurate_vblank_count(&crtc->base);
 
 	return dev->driver->get_vblank_counter(dev, crtc->pipe);
@@ -15894,10 +15924,12 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 	 * waits, so we need vblank interrupts restored beforehand.
 	 */
 	for_each_intel_crtc(&dev_priv->drm, crtc) {
+		crtc_state = to_intel_crtc_state(crtc->base.state);
+
 		drm_crtc_vblank_reset(&crtc->base);
 
-		if (crtc->base.state->active)
-			drm_crtc_vblank_on(&crtc->base);
+		if (crtc_state->base.active)
+			intel_crtc_vblank_on(crtc_state);
 	}
 
 	intel_sanitize_plane_mapping(dev_priv);

From 6801603d3d7dd05c62d368045c7d5a90980596a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 18:59:47 +0200
Subject: [PATCH 18/89] drm/i915/tv: Fix interlaced ysize calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the calculation of the vertical active period for interlaced
TV modes.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-4-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index bd5536f0ec92..91cb31ff8ff0 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1086,7 +1086,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	if (tv_mode->progressive)
 		ysize = tv_mode->nbr_end + 1;
 	else
-		ysize = 2*tv_mode->nbr_end + 1;
+		ysize = 2 * (tv_mode->nbr_end + 1);
 
 	xpos += conn_state->tv.margins.left;
 	ypos += conn_state->tv.margins.top;

From d515282380df8c85ca5ed1cc9699c52ccf00fa18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 18:59:48 +0200
Subject: [PATCH 19/89] drm/i915/tv: Fix tv mode clocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The oversample clock is always supposed to be either 108 MHz
or 148.5 MHz. Make it so.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-5-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 91cb31ff8ff0..a053bb3460e6 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -635,7 +635,7 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "480p",
-		.clock		= 107520,
+		.clock		= 108000,
 		.refresh	= 59940,
 		.oversample     = TV_OVERSAMPLE_4X,
 		.component_only = 1,
@@ -659,7 +659,7 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "576p",
-		.clock		= 107520,
+		.clock		= 108000,
 		.refresh	= 50000,
 		.oversample     = TV_OVERSAMPLE_4X,
 		.component_only = 1,
@@ -683,7 +683,7 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "720p@60Hz",
-		.clock		= 148800,
+		.clock		= 148500,
 		.refresh	= 60000,
 		.oversample     = TV_OVERSAMPLE_2X,
 		.component_only = 1,
@@ -707,7 +707,7 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "720p@50Hz",
-		.clock		= 148800,
+		.clock		= 148500,
 		.refresh	= 50000,
 		.oversample     = TV_OVERSAMPLE_2X,
 		.component_only = 1,
@@ -732,7 +732,7 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "1080i@50Hz",
-		.clock		= 148800,
+		.clock		= 148500,
 		.refresh	= 50000,
 		.oversample     = TV_OVERSAMPLE_2X,
 		.component_only = 1,
@@ -758,7 +758,7 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "1080i@60Hz",
-		.clock		= 148800,
+		.clock		= 148500,
 		.refresh	= 60000,
 		.oversample     = TV_OVERSAMPLE_2X,
 		.component_only = 1,
@@ -1113,7 +1113,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 static const struct drm_display_mode reported_modes[] = {
 	{
 		.name = "NTSC 480i",
-		.clock = 107520,
+		.clock = 108000,
 		.hdisplay = 1280,
 		.hsync_start = 1368,
 		.hsync_end = 1496,

From 4f50379837434182922f06b0c6b2a28d498c6480 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 18:59:49 +0200
Subject: [PATCH 20/89] drm/i915/tv: Store the TV oversampling factor in the TV
 mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Store the oversampling factor as a number in the TV modes. We
shall want to arithmetic with this which is easier if it's
a number we can use directly.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-6-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 42 ++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index a053bb3460e6..3e3eee77296a 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -306,7 +306,7 @@ struct tv_mode {
 
 	u32 clock;
 	u16 refresh; /* in millihertz (for precision) */
-	u32 oversample;
+	u8 oversample;
 	u8 hsync_end;
 	u16 hblank_start, hblank_end, htotal;
 	bool progressive : 1, trilevel_sync : 1, component_only : 1;
@@ -378,7 +378,7 @@ static const struct tv_mode tv_modes[] = {
 		.name		= "NTSC-M",
 		.clock		= 108000,
 		.refresh	= 59940,
-		.oversample	= TV_OVERSAMPLE_8X,
+		.oversample	= 8,
 		.component_only = 0,
 		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
 
@@ -421,7 +421,7 @@ static const struct tv_mode tv_modes[] = {
 		.name		= "NTSC-443",
 		.clock		= 108000,
 		.refresh	= 59940,
-		.oversample	= TV_OVERSAMPLE_8X,
+		.oversample	= 8,
 		.component_only = 0,
 		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 4.43MHz */
 		.hsync_end	= 64,		    .hblank_end		= 124,
@@ -463,7 +463,7 @@ static const struct tv_mode tv_modes[] = {
 		.name		= "NTSC-J",
 		.clock		= 108000,
 		.refresh	= 59940,
-		.oversample	= TV_OVERSAMPLE_8X,
+		.oversample	= 8,
 		.component_only = 0,
 
 		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
@@ -506,7 +506,7 @@ static const struct tv_mode tv_modes[] = {
 		.name		= "PAL-M",
 		.clock		= 108000,
 		.refresh	= 59940,
-		.oversample	= TV_OVERSAMPLE_8X,
+		.oversample	= 8,
 		.component_only = 0,
 
 		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
@@ -550,7 +550,7 @@ static const struct tv_mode tv_modes[] = {
 		.name	    = "PAL-N",
 		.clock		= 108000,
 		.refresh	= 50000,
-		.oversample	= TV_OVERSAMPLE_8X,
+		.oversample	= 8,
 		.component_only = 0,
 
 		.hsync_end	= 64,		    .hblank_end		= 128,
@@ -595,7 +595,7 @@ static const struct tv_mode tv_modes[] = {
 		.name	    = "PAL",
 		.clock		= 108000,
 		.refresh	= 50000,
-		.oversample	= TV_OVERSAMPLE_8X,
+		.oversample	= 8,
 		.component_only = 0,
 
 		.hsync_end	= 64,		    .hblank_end		= 142,
@@ -637,7 +637,7 @@ static const struct tv_mode tv_modes[] = {
 		.name       = "480p",
 		.clock		= 108000,
 		.refresh	= 59940,
-		.oversample     = TV_OVERSAMPLE_4X,
+		.oversample     = 4,
 		.component_only = 1,
 
 		.hsync_end      = 64,               .hblank_end         = 122,
@@ -661,7 +661,7 @@ static const struct tv_mode tv_modes[] = {
 		.name       = "576p",
 		.clock		= 108000,
 		.refresh	= 50000,
-		.oversample     = TV_OVERSAMPLE_4X,
+		.oversample     = 4,
 		.component_only = 1,
 
 		.hsync_end      = 64,               .hblank_end         = 139,
@@ -685,7 +685,7 @@ static const struct tv_mode tv_modes[] = {
 		.name       = "720p@60Hz",
 		.clock		= 148500,
 		.refresh	= 60000,
-		.oversample     = TV_OVERSAMPLE_2X,
+		.oversample     = 2,
 		.component_only = 1,
 
 		.hsync_end      = 80,               .hblank_end         = 300,
@@ -709,7 +709,7 @@ static const struct tv_mode tv_modes[] = {
 		.name       = "720p@50Hz",
 		.clock		= 148500,
 		.refresh	= 50000,
-		.oversample     = TV_OVERSAMPLE_2X,
+		.oversample     = 2,
 		.component_only = 1,
 
 		.hsync_end      = 80,               .hblank_end         = 300,
@@ -734,7 +734,7 @@ static const struct tv_mode tv_modes[] = {
 		.name       = "1080i@50Hz",
 		.clock		= 148500,
 		.refresh	= 50000,
-		.oversample     = TV_OVERSAMPLE_2X,
+		.oversample     = 2,
 		.component_only = 1,
 
 		.hsync_end      = 88,               .hblank_end         = 235,
@@ -760,7 +760,7 @@ static const struct tv_mode tv_modes[] = {
 		.name       = "1080i@60Hz",
 		.clock		= 148500,
 		.refresh	= 60000,
-		.oversample     = TV_OVERSAMPLE_2X,
+		.oversample     = 2,
 		.component_only = 1,
 
 		.hsync_end      = 88,               .hblank_end         = 235,
@@ -1029,7 +1029,21 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	}
 
 	tv_ctl |= TV_ENC_PIPE_SEL(intel_crtc->pipe);
-	tv_ctl |= tv_mode->oversample;
+
+	switch (tv_mode->oversample) {
+	case 8:
+		tv_ctl |= TV_OVERSAMPLE_8X;
+		break;
+	case 4:
+		tv_ctl |= TV_OVERSAMPLE_4X;
+		break;
+	case 2:
+		tv_ctl |= TV_OVERSAMPLE_2X;
+		break;
+	default:
+		tv_ctl |= TV_OVERSAMPLE_NONE;
+		break;
+	}
 
 	if (tv_mode->progressive)
 		tv_ctl |= TV_PROGRESSIVE;

From 56f6230811818a9dc659ffbfb9f954b2bed25819 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 18:59:50 +0200
Subject: [PATCH 21/89] drm/i915/tv: Use bools where appropriate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'component_only' is a bool. Initialize it like a bool.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-7-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 3e3eee77296a..0536077cc955 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -379,7 +379,7 @@ static const struct tv_mode tv_modes[] = {
 		.clock		= 108000,
 		.refresh	= 59940,
 		.oversample	= 8,
-		.component_only = 0,
+		.component_only = false,
 		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
 
 		.hsync_end	= 64,		    .hblank_end		= 124,
@@ -422,7 +422,7 @@ static const struct tv_mode tv_modes[] = {
 		.clock		= 108000,
 		.refresh	= 59940,
 		.oversample	= 8,
-		.component_only = 0,
+		.component_only = false,
 		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 4.43MHz */
 		.hsync_end	= 64,		    .hblank_end		= 124,
 		.hblank_start	= 836,		    .htotal		= 857,
@@ -464,7 +464,7 @@ static const struct tv_mode tv_modes[] = {
 		.clock		= 108000,
 		.refresh	= 59940,
 		.oversample	= 8,
-		.component_only = 0,
+		.component_only = false,
 
 		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
 		.hsync_end	= 64,		    .hblank_end		= 124,
@@ -507,7 +507,7 @@ static const struct tv_mode tv_modes[] = {
 		.clock		= 108000,
 		.refresh	= 59940,
 		.oversample	= 8,
-		.component_only = 0,
+		.component_only = false,
 
 		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
 		.hsync_end	= 64,		  .hblank_end		= 124,
@@ -551,7 +551,7 @@ static const struct tv_mode tv_modes[] = {
 		.clock		= 108000,
 		.refresh	= 50000,
 		.oversample	= 8,
-		.component_only = 0,
+		.component_only = false,
 
 		.hsync_end	= 64,		    .hblank_end		= 128,
 		.hblank_start = 844,	    .htotal		= 863,
@@ -596,7 +596,7 @@ static const struct tv_mode tv_modes[] = {
 		.clock		= 108000,
 		.refresh	= 50000,
 		.oversample	= 8,
-		.component_only = 0,
+		.component_only = false,
 
 		.hsync_end	= 64,		    .hblank_end		= 142,
 		.hblank_start	= 844,	    .htotal		= 863,
@@ -638,7 +638,7 @@ static const struct tv_mode tv_modes[] = {
 		.clock		= 108000,
 		.refresh	= 59940,
 		.oversample     = 4,
-		.component_only = 1,
+		.component_only = true,
 
 		.hsync_end      = 64,               .hblank_end         = 122,
 		.hblank_start   = 842,              .htotal             = 857,
@@ -662,7 +662,7 @@ static const struct tv_mode tv_modes[] = {
 		.clock		= 108000,
 		.refresh	= 50000,
 		.oversample     = 4,
-		.component_only = 1,
+		.component_only = true,
 
 		.hsync_end      = 64,               .hblank_end         = 139,
 		.hblank_start   = 859,              .htotal             = 863,
@@ -686,7 +686,7 @@ static const struct tv_mode tv_modes[] = {
 		.clock		= 148500,
 		.refresh	= 60000,
 		.oversample     = 2,
-		.component_only = 1,
+		.component_only = true,
 
 		.hsync_end      = 80,               .hblank_end         = 300,
 		.hblank_start   = 1580,             .htotal             = 1649,
@@ -710,7 +710,7 @@ static const struct tv_mode tv_modes[] = {
 		.clock		= 148500,
 		.refresh	= 50000,
 		.oversample     = 2,
-		.component_only = 1,
+		.component_only = true,
 
 		.hsync_end      = 80,               .hblank_end         = 300,
 		.hblank_start   = 1580,             .htotal             = 1979,
@@ -735,7 +735,7 @@ static const struct tv_mode tv_modes[] = {
 		.clock		= 148500,
 		.refresh	= 50000,
 		.oversample     = 2,
-		.component_only = 1,
+		.component_only = true,
 
 		.hsync_end      = 88,               .hblank_end         = 235,
 		.hblank_start   = 2155,             .htotal             = 2639,
@@ -761,7 +761,7 @@ static const struct tv_mode tv_modes[] = {
 		.clock		= 148500,
 		.refresh	= 60000,
 		.oversample     = 2,
-		.component_only = 1,
+		.component_only = true,
 
 		.hsync_end      = 88,               .hblank_end         = 235,
 		.hblank_start   = 2155,             .htotal             = 2199,

From bda5f53206e56c69ac8b7c8d60e08ee97fd4618f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 18:59:51 +0200
Subject: [PATCH 22/89] drm/i915/tv: Nuke silly 0 initialzation of xpos/ypos
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just assign the margin values directly to xpos/ypos instead
of first initializing to zero and then adding the values.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-8-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 0536077cc955..0910eadc605a 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -993,7 +993,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	const struct video_levels *video_levels;
 	const struct color_conversion *color_conversion;
 	bool burst_ena;
-	int xpos = 0x0, ypos = 0x0;
+	int xpos, ypos;
 	unsigned int xsize, ysize;
 
 	if (!tv_mode)
@@ -1102,8 +1102,8 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	else
 		ysize = 2 * (tv_mode->nbr_end + 1);
 
-	xpos += conn_state->tv.margins.left;
-	ypos += conn_state->tv.margins.top;
+	xpos = conn_state->tv.margins.left;
+	ypos = conn_state->tv.margins.top;
 	xsize -= (conn_state->tv.margins.left +
 		  conn_state->tv.margins.right);
 	ysize -= (conn_state->tv.margins.top +

From 65ddf7f968b8d3c51b0fcefc43339c69f3bdd546 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 18:59:53 +0200
Subject: [PATCH 23/89] drm/i915/tv: Deobfuscate preferred mode selection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rewrite the preferred mode selection to just check
whether the TV modes is HD or SD. For SD TV modes we
favor 480 line modes, for 720p we prefer 720 line modes,
and for 1080i/p we prefer 1080 line modes.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-10-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 52 ++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 0910eadc605a..e5eef87117e9 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -859,6 +859,14 @@ intel_tv_mode_valid(struct drm_connector *connector,
 	return MODE_CLOCK_RANGE;
 }
 
+static int
+intel_tv_mode_vdisplay(const struct tv_mode *tv_mode)
+{
+	if (tv_mode->progressive)
+		return tv_mode->nbr_end + 1;
+	else
+		return 2 * (tv_mode->nbr_end + 1);
+}
 
 static void
 intel_tv_get_config(struct intel_encoder *encoder,
@@ -1097,10 +1105,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	/* Filter ctl must be set before TV_WIN_SIZE */
 	I915_WRITE(TV_FILTER_CTL_1, TV_AUTO_SCALE);
 	xsize = tv_mode->hblank_start - tv_mode->hblank_end;
-	if (tv_mode->progressive)
-		ysize = tv_mode->nbr_end + 1;
-	else
-		ysize = 2 * (tv_mode->nbr_end + 1);
+	ysize = intel_tv_mode_vdisplay(tv_mode);
 
 	xpos = conn_state->tv.margins.left;
 	ypos = conn_state->tv.margins.top;
@@ -1319,22 +1324,28 @@ static const struct input_res {
 	{"1920x1080", 1920, 1080},
 };
 
-/*
- * Chose preferred mode  according to line number of TV format
- */
-static void
-intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode,
-			       struct drm_display_mode *mode_ptr)
+/* Choose preferred mode according to line number of TV format */
+static bool
+intel_tv_is_preferred_mode(const struct drm_display_mode *mode,
+			   const struct tv_mode *tv_mode)
 {
-	if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480)
-		mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
-	else if (tv_mode->nbr_end > 480) {
-		if (tv_mode->progressive == true && tv_mode->nbr_end < 720) {
-			if (mode_ptr->vdisplay == 720)
-				mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
-		} else if (mode_ptr->vdisplay == 1080)
-				mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
-	}
+	int vdisplay = intel_tv_mode_vdisplay(tv_mode);
+
+	/* prefer 480 line modes for all SD TV modes */
+	if (vdisplay <= 576)
+		vdisplay = 480;
+
+	return vdisplay == mode->vdisplay;
+}
+
+static void
+intel_tv_set_mode_type(struct drm_display_mode *mode,
+		       const struct tv_mode *tv_mode)
+{
+	mode->type = DRM_MODE_TYPE_DRIVER;
+
+	if (intel_tv_is_preferred_mode(mode, tv_mode))
+		mode->type |= DRM_MODE_TYPE_PREFERRED;
 }
 
 static int
@@ -1382,8 +1393,7 @@ intel_tv_get_modes(struct drm_connector *connector)
 		tmp = div_u64(tmp, 1000000);
 		mode_ptr->clock = (int) tmp;
 
-		mode_ptr->type = DRM_MODE_TYPE_DRIVER;
-		intel_tv_choose_preferred_modes(tv_mode, mode_ptr);
+		intel_tv_set_mode_type(mode_ptr, tv_mode);
 		drm_mode_probed_add(connector, mode_ptr);
 		count++;
 	}

From 5023520fd37293f48eeb08429ac5a344a9c19ea4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 18:59:54 +0200
Subject: [PATCH 24/89] drm/i915/tv: Use drm_mode_set_name() to name TV modes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No point in storing the mode names in the array. drm_mode_set_name()
will give us the same names without wasting space for these string
constants.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-11-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index e5eef87117e9..89e60a8f0fd9 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1312,16 +1312,15 @@ intel_tv_detect(struct drm_connector *connector,
 }
 
 static const struct input_res {
-	const char *name;
-	int w, h;
+	u16 w, h;
 } input_res_table[] = {
-	{"640x480", 640, 480},
-	{"800x600", 800, 600},
-	{"1024x768", 1024, 768},
-	{"1280x1024", 1280, 1024},
-	{"848x480", 848, 480},
-	{"1280x720", 1280, 720},
-	{"1920x1080", 1920, 1080},
+	{ 640, 480 },
+	{ 800, 600 },
+	{ 1024, 768 },
+	{ 1280, 1024 },
+	{ 848, 480 },
+	{ 1280, 720 },
+	{ 1920, 1080 },
 };
 
 /* Choose preferred mode according to line number of TV format */
@@ -1372,7 +1371,6 @@ intel_tv_get_modes(struct drm_connector *connector)
 		mode_ptr = drm_mode_create(connector->dev);
 		if (!mode_ptr)
 			continue;
-		strlcpy(mode_ptr->name, input->name, DRM_DISPLAY_MODE_LEN);
 
 		mode_ptr->hdisplay = hactive_s;
 		mode_ptr->hsync_start = hactive_s + 1;
@@ -1394,6 +1392,9 @@ intel_tv_get_modes(struct drm_connector *connector)
 		mode_ptr->clock = (int) tmp;
 
 		intel_tv_set_mode_type(mode_ptr, tv_mode);
+
+		drm_mode_set_name(mode_ptr);
+
 		drm_mode_probed_add(connector, mode_ptr);
 		count++;
 	}

From e94390aadaf2bf0fb87c9b7b0c0dc9910f08356a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 18:59:55 +0200
Subject: [PATCH 25/89] drm/i915/tv: Make TV mode autoselection actually
 useable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current code insists on picking a new TV mode when
switching between component and non-component cables.
That's super annoying. Let's just keep the current TV
mode unless the new cable type actually disagrees with it.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-12-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 89e60a8f0fd9..6279d6fed7e9 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1252,16 +1252,18 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
 	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
 	int i;
 
-	if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) ==
-		tv_mode->component_only)
+	/* Component supports everything so we can keep the current mode */
+	if (intel_tv->type == DRM_MODE_CONNECTOR_Component)
 		return;
 
+	/* If the current mode is fine don't change it */
+	if (!tv_mode->component_only)
+		return;
 
 	for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
-		tv_mode = tv_modes + i;
+		tv_mode = &tv_modes[i];
 
-		if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) ==
-			tv_mode->component_only)
+		if (!tv_mode->component_only)
 			break;
 	}
 

From 528132a341fca6181dde75a4587e3703d04568c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 18:59:56 +0200
Subject: [PATCH 26/89] drm/i915/tv: Nuke reported_modes[]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the silly reported_modes[] array. I suppse once upon a time
this actually had something to do with modes we reported to userspace.
Now it is just the placeholder for the mode we use for load detection.
We don't need it even for that, and instead we can just rely on
the fallback mode in intel_get_load_detect_pipe().

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-13-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 6279d6fed7e9..2cacb8165fb6 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1129,23 +1129,6 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	I915_WRITE(TV_CTL, tv_ctl);
 }
 
-static const struct drm_display_mode reported_modes[] = {
-	{
-		.name = "NTSC 480i",
-		.clock = 108000,
-		.hdisplay = 1280,
-		.hsync_start = 1368,
-		.hsync_end = 1496,
-		.htotal = 1712,
-
-		.vdisplay = 1024,
-		.vsync_start = 1027,
-		.vsync_end = 1034,
-		.vtotal = 1104,
-		.type = DRM_MODE_TYPE_DRIVER,
-	},
-};
-
 static int
 intel_tv_detect_type(struct intel_tv *intel_tv,
 		      struct drm_connector *connector)
@@ -1275,7 +1258,6 @@ intel_tv_detect(struct drm_connector *connector,
 		struct drm_modeset_acquire_ctx *ctx,
 		bool force)
 {
-	struct drm_display_mode mode;
 	struct intel_tv *intel_tv = intel_attached_tv(connector);
 	enum drm_connector_status status;
 	int type;
@@ -1284,13 +1266,11 @@ intel_tv_detect(struct drm_connector *connector,
 		      connector->base.id, connector->name,
 		      force);
 
-	mode = reported_modes[0];
-
 	if (force) {
 		struct intel_load_detect_pipe tmp;
 		int ret;
 
-		ret = intel_get_load_detect_pipe(connector, &mode, &tmp, ctx);
+		ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx);
 		if (ret < 0)
 			return ret;
 

From a0ff6779c75fe86a66b8c2eb439a40a12139eb02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 18:59:57 +0200
Subject: [PATCH 27/89] drm/i915/tv: Add 1080p30/50/60 TV modes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the missing 1080p TV modes. On gen4 all of them work just fine,
whereas on gen3 only the 30Hz mode actually works correctly.

v2: s/IS_GEN3(dev_priv)/IS_GEN(dev_priv, 3)/

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-14-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 90 +++++++++++++++++++++++++++++++--
 1 file changed, 86 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 2cacb8165fb6..421f71c222e5 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -782,6 +782,84 @@ static const struct tv_mode tv_modes[] = {
 
 		.filter_table = filter_table,
 	},
+
+	{
+		.name       = "1080p@30Hz",
+		.clock		= 148500,
+		.refresh	= 30000,
+		.oversample     = 2,
+		.component_only = true,
+
+		.hsync_end      = 88,               .hblank_end         = 235,
+		.hblank_start   = 2155,             .htotal             = 2199,
+
+		.progressive	= true,		    .trilevel_sync = true,
+
+		.vsync_start_f1 = 8,               .vsync_start_f2     = 8,
+		.vsync_len      = 10,
+
+		.veq_ena	= false,	.veq_start_f1	= 0,
+		.veq_start_f2	= 0,		    .veq_len		= 0,
+
+		.vi_end_f1      = 44,               .vi_end_f2          = 44,
+		.nbr_end        = 1079,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+
+	{
+		.name       = "1080p@50Hz",
+		.clock		= 148500,
+		.refresh	= 50000,
+		.oversample     = 1,
+		.component_only = true,
+
+		.hsync_end      = 88,               .hblank_end         = 235,
+		.hblank_start   = 2155,             .htotal             = 2639,
+
+		.progressive	= true,		    .trilevel_sync = true,
+
+		.vsync_start_f1 = 8,               .vsync_start_f2     = 8,
+		.vsync_len      = 10,
+
+		.veq_ena	= false,	.veq_start_f1	= 0,
+		.veq_start_f2	= 0,		    .veq_len		= 0,
+
+		.vi_end_f1      = 44,               .vi_end_f2          = 44,
+		.nbr_end        = 1079,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+
+	{
+		.name       = "1080p@60Hz",
+		.clock		= 148500,
+		.refresh	= 60000,
+		.oversample     = 1,
+		.component_only = true,
+
+		.hsync_end      = 88,               .hblank_end         = 235,
+		.hblank_start   = 2155,             .htotal             = 2199,
+
+		.progressive	= true,		    .trilevel_sync = true,
+
+		.vsync_start_f1 = 8,               .vsync_start_f2     = 8,
+		.vsync_len      = 10,
+
+		.veq_ena	= false,		    .veq_start_f1	= 0,
+		.veq_start_f2	= 0,		    .veq_len		= 0,
+
+		.vi_end_f1      = 44,               .vi_end_f2          = 44,
+		.nbr_end        = 1079,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
 };
 
 static struct intel_tv *enc_to_tv(struct intel_encoder *encoder)
@@ -1537,11 +1615,15 @@ intel_tv_init(struct drm_i915_private *dev_priv)
 	connector->doublescan_allowed = false;
 
 	/* Create TV properties then attach current values */
-	for (i = 0; i < ARRAY_SIZE(tv_modes); i++)
+	for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
+		/* 1080p50/1080p60 not supported on gen3 */
+		if (IS_GEN(dev_priv, 3) &&
+		    tv_modes[i].oversample == 1)
+			break;
+
 		tv_format_names[i] = tv_modes[i].name;
-	drm_mode_create_tv_properties(dev,
-				      ARRAY_SIZE(tv_modes),
-				      tv_format_names);
+	}
+	drm_mode_create_tv_properties(dev, i, tv_format_names);
 
 	drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property,
 				   state->tv.mode);

From e3bb355c7d8b2e537673066ee223a554457ff50d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 18:59:58 +0200
Subject: [PATCH 28/89] drm/i915/tv: Generate better pipe timings for TV
 encoder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To make vblank timestamps work better with the TV encoder let's
scale the pipe timings such that the relationship between the
TV active and TV blanking periods is mirrored in the
corresponding pipe timings.

Note that in reality the pipe runs at a faster speed during the
TV vblank, and correspondigly there are periods when the pipe
is enitrely stopped. We pretend that this isn't the case and
as such we incur some error in the vblank timestamps during
the TV vblank. Further explanation of the issues in a big
comment in the code.

This makes the vblank timestamps good enough to make
i965gm (which doesn't have a working frame counter with
the TV encoder) report correct frame numbers. Previously
you could get all kinds of nonsense which resulted in
eg. glxgears reporting that it's running at twice the
actual framerate in most cases.

v2: s/IS_GEN4(dev_priv)/IS_GEN(dev_priv, 4)/ in the comment
    for consistency

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-15-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h |   1 +
 drivers/gpu/drm/i915/intel_tv.c | 322 +++++++++++++++++++++++++++-----
 2 files changed, 278 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f4e447437d75..1eca166d95bb 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4895,6 +4895,7 @@ enum {
 # define TV_OVERSAMPLE_NONE		(2 << 18)
 /* Selects 8x oversampling */
 # define TV_OVERSAMPLE_8X		(3 << 18)
+# define TV_OVERSAMPLE_MASK		(3 << 18)
 /* Selects progressive mode rather than interlaced */
 # define TV_PROGRESSIVE			(1 << 17)
 /* Sets the colorburst to PAL mode.  Required for non-M PAL modes. */
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 421f71c222e5..6897977ac117 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -339,7 +339,6 @@ struct tv_mode {
 	const struct video_levels *composite_levels, *svideo_levels;
 	const struct color_conversion *composite_color, *svideo_color;
 	const u32 *filter_table;
-	u16 max_srcw;
 };
 
 
@@ -728,7 +727,6 @@ static const struct tv_mode tv_modes[] = {
 		.burst_ena      = false,
 
 		.filter_table = filter_table,
-		.max_srcw = 800
 	},
 	{
 		.name       = "1080i@50Hz",
@@ -946,13 +944,183 @@ intel_tv_mode_vdisplay(const struct tv_mode *tv_mode)
 		return 2 * (tv_mode->nbr_end + 1);
 }
 
+static void
+intel_tv_mode_to_mode(struct drm_display_mode *mode,
+		      const struct tv_mode *tv_mode)
+{
+	mode->clock = tv_mode->clock /
+		(tv_mode->oversample >> !tv_mode->progressive);
+
+	/*
+	 * tv_mode horizontal timings:
+	 *
+	 * hsync_end
+	 *    | hblank_end
+	 *    |    | hblank_start
+	 *    |    |       | htotal
+	 *    |     _______    |
+	 *     ____/       \___
+	 * \__/                \
+	 */
+	mode->hdisplay =
+		tv_mode->hblank_start - tv_mode->hblank_end;
+	mode->hsync_start = mode->hdisplay +
+		tv_mode->htotal - tv_mode->hblank_start;
+	mode->hsync_end = mode->hsync_start +
+		tv_mode->hsync_end;
+	mode->htotal = tv_mode->htotal + 1;
+
+	/*
+	 * tv_mode vertical timings:
+	 *
+	 * vsync_start
+	 *    | vsync_end
+	 *    |  | vi_end nbr_end
+	 *    |  |    |       |
+	 *    |  |     _______
+	 * \__    ____/       \
+	 *    \__/
+	 */
+	mode->vdisplay = intel_tv_mode_vdisplay(tv_mode);
+	if (tv_mode->progressive) {
+		mode->vsync_start = mode->vdisplay +
+			tv_mode->vsync_start_f1 + 1;
+		mode->vsync_end = mode->vsync_start +
+			tv_mode->vsync_len;
+		mode->vtotal = mode->vdisplay +
+			tv_mode->vi_end_f1 + 1;
+	} else {
+		mode->vsync_start = mode->vdisplay +
+			tv_mode->vsync_start_f1 + 1 +
+			tv_mode->vsync_start_f2 + 1;
+		mode->vsync_end = mode->vsync_start +
+			2 * tv_mode->vsync_len;
+		mode->vtotal = mode->vdisplay +
+			tv_mode->vi_end_f1 + 1 +
+			tv_mode->vi_end_f2 + 1;
+	}
+
+	/* TV has it's own notion of sync and other mode flags, so clear them. */
+	mode->flags = 0;
+
+	mode->vrefresh = 0;
+	mode->vrefresh = drm_mode_vrefresh(mode);
+
+	snprintf(mode->name, sizeof(mode->name),
+		 "%dx%d%c (%s)",
+		 mode->hdisplay, mode->vdisplay,
+		 tv_mode->progressive ? 'p' : 'i',
+		 tv_mode->name);
+}
+
+static void intel_tv_scale_mode_horiz(struct drm_display_mode *mode,
+				      int hdisplay, int left_margin,
+				      int right_margin)
+{
+	int hsync_start = mode->hsync_start - mode->hdisplay + right_margin;
+	int hsync_end = mode->hsync_end - mode->hdisplay + right_margin;
+	int new_htotal = mode->htotal * hdisplay /
+		(mode->hdisplay - left_margin - right_margin);
+
+	mode->clock = mode->clock * new_htotal / mode->htotal;
+
+	mode->hdisplay = hdisplay;
+	mode->hsync_start = hdisplay + hsync_start * new_htotal / mode->htotal;
+	mode->hsync_end = hdisplay + hsync_end * new_htotal / mode->htotal;
+	mode->htotal = new_htotal;
+}
+
+static void intel_tv_scale_mode_vert(struct drm_display_mode *mode,
+				     int vdisplay, int top_margin,
+				     int bottom_margin)
+{
+	int vsync_start = mode->vsync_start - mode->vdisplay + bottom_margin;
+	int vsync_end = mode->vsync_end - mode->vdisplay + bottom_margin;
+	int new_vtotal = mode->vtotal * vdisplay /
+		(mode->vdisplay - top_margin - bottom_margin);
+
+	mode->clock = mode->clock * new_vtotal / mode->vtotal;
+
+	mode->vdisplay = vdisplay;
+	mode->vsync_start = vdisplay + vsync_start * new_vtotal / mode->vtotal;
+	mode->vsync_end = vdisplay + vsync_end * new_vtotal / mode->vtotal;
+	mode->vtotal = new_vtotal;
+}
+
 static void
 intel_tv_get_config(struct intel_encoder *encoder,
 		    struct intel_crtc_state *pipe_config)
 {
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct drm_display_mode *adjusted_mode =
+		&pipe_config->base.adjusted_mode;
+	struct drm_display_mode mode = {};
+	u32 tv_ctl, hctl1, hctl3, vctl1, vctl2, tmp;
+	struct tv_mode tv_mode = {};
+	int hdisplay = adjusted_mode->crtc_hdisplay;
+	int vdisplay = adjusted_mode->crtc_vdisplay;
+	int xsize, ysize, xpos, ypos;
+
 	pipe_config->output_types |= BIT(INTEL_OUTPUT_TVOUT);
 
-	pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock;
+	tv_ctl = I915_READ(TV_CTL);
+	hctl1 = I915_READ(TV_H_CTL_1);
+	hctl3 = I915_READ(TV_H_CTL_3);
+	vctl1 = I915_READ(TV_V_CTL_1);
+	vctl2 = I915_READ(TV_V_CTL_2);
+
+	tv_mode.htotal = (hctl1 & TV_HTOTAL_MASK) >> TV_HTOTAL_SHIFT;
+	tv_mode.hsync_end = (hctl1 & TV_HSYNC_END_MASK) >> TV_HSYNC_END_SHIFT;
+
+	tv_mode.hblank_start = (hctl3 & TV_HBLANK_START_MASK) >> TV_HBLANK_START_SHIFT;
+	tv_mode.hblank_end = (hctl3 & TV_HSYNC_END_MASK) >> TV_HBLANK_END_SHIFT;
+
+	tv_mode.nbr_end = (vctl1 & TV_NBR_END_MASK) >> TV_NBR_END_SHIFT;
+	tv_mode.vi_end_f1 = (vctl1 & TV_VI_END_F1_MASK) >> TV_VI_END_F1_SHIFT;
+	tv_mode.vi_end_f2 = (vctl1 & TV_VI_END_F2_MASK) >> TV_VI_END_F2_SHIFT;
+
+	tv_mode.vsync_len = (vctl2 & TV_VSYNC_LEN_MASK) >> TV_VSYNC_LEN_SHIFT;
+	tv_mode.vsync_start_f1 = (vctl2 & TV_VSYNC_START_F1_MASK) >> TV_VSYNC_START_F1_SHIFT;
+	tv_mode.vsync_start_f2 = (vctl2 & TV_VSYNC_START_F2_MASK) >> TV_VSYNC_START_F2_SHIFT;
+
+	tv_mode.clock = pipe_config->port_clock;
+
+	tv_mode.progressive = tv_ctl & TV_PROGRESSIVE;
+
+	switch (tv_ctl & TV_OVERSAMPLE_MASK) {
+	case TV_OVERSAMPLE_8X:
+		tv_mode.oversample = 8;
+		break;
+	case TV_OVERSAMPLE_4X:
+		tv_mode.oversample = 4;
+		break;
+	case TV_OVERSAMPLE_2X:
+		tv_mode.oversample = 2;
+		break;
+	default:
+		tv_mode.oversample = 1;
+		break;
+	}
+
+	tmp = I915_READ(TV_WIN_POS);
+	xpos = tmp >> 16;
+	ypos = tmp & 0xffff;
+
+	tmp = I915_READ(TV_WIN_SIZE);
+	xsize = tmp >> 16;
+	ysize = tmp & 0xffff;
+
+	intel_tv_mode_to_mode(&mode, &tv_mode);
+
+	DRM_DEBUG_KMS("TV mode:\n");
+	drm_mode_debug_printmodeline(&mode);
+
+	intel_tv_scale_mode_horiz(&mode, hdisplay,
+				  xpos, mode.hdisplay - xsize - xpos);
+	intel_tv_scale_mode_vert(&mode, vdisplay,
+				 ypos, mode.vdisplay - ysize - ypos);
+
+	adjusted_mode->crtc_clock = mode.clock;
 }
 
 static int
@@ -963,6 +1131,8 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
 	struct drm_display_mode *adjusted_mode =
 		&pipe_config->base.adjusted_mode;
+	int hdisplay = adjusted_mode->crtc_hdisplay;
+	int vdisplay = adjusted_mode->crtc_vdisplay;
 
 	if (!tv_mode)
 		return -EINVAL;
@@ -971,17 +1141,90 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 		return -EINVAL;
 
 	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
-	adjusted_mode->crtc_clock = tv_mode->clock;
+
 	DRM_DEBUG_KMS("forcing bpc to 8 for TV\n");
 	pipe_config->pipe_bpp = 8*3;
 
-	/* TV has it's own notion of sync and other mode flags, so clear them. */
-	adjusted_mode->flags = 0;
+	pipe_config->port_clock = tv_mode->clock;
+
+	intel_tv_mode_to_mode(adjusted_mode, tv_mode);
+
+	DRM_DEBUG_KMS("TV mode:\n");
+	drm_mode_debug_printmodeline(adjusted_mode);
 
 	/*
-	 * FIXME: We don't check whether the input mode is actually what we want
-	 * or whether userspace is doing something stupid.
+	 * The pipe scanline counter behaviour looks as follows when
+	 * using the TV encoder:
+	 *
+	 * time ->
+	 *
+	 * dsl=vtotal-1       |             |
+	 *                   ||            ||
+	 *               ___| |        ___| |
+	 *              /     |       /     |
+	 *             /      |      /      |
+	 * dsl=0   ___/       |_____/       |
+	 *        | | |  |  | |
+	 *         ^ ^ ^   ^ ^
+	 *         | | |   | pipe vblank/first part of tv vblank
+	 *         | | |   bottom margin
+	 *         | | active
+	 *         | top margin
+	 *         remainder of tv vblank
+	 *
+	 * When the TV encoder is used the pipe wants to run faster
+	 * than expected rate. During the active portion the TV
+	 * encoder stalls the pipe every few lines to keep it in
+	 * check. When the TV encoder reaches the bottom margin the
+	 * pipe simply stops. Once we reach the TV vblank the pipe is
+	 * no longer stalled and it runs at the max rate (apparently
+	 * oversample clock on gen3, cdclk on gen4). Once the pipe
+	 * reaches the pipe vtotal the pipe stops for the remainder
+	 * of the TV vblank/top margin. The pipe starts up again when
+	 * the TV encoder exits the top margin.
+	 *
+	 * To avoid huge hassles for vblank timestamping we scale
+	 * the pipe timings as if the pipe always runs at the average
+	 * rate it maintains during the active period. This also
+	 * gives us a reasonable guesstimate as to the pixel rate.
+	 * Due to the variation in the actual pipe speed the scanline
+	 * counter will give us slightly erroneous results during the
+	 * TV vblank/margins. But since vtotal was selected such that
+	 * it matches the average rate of the pipe during the active
+	 * portion the error shouldn't cause any serious grief to
+	 * vblank timestamps.
+	 *
+	 * For posterity here is the empirically derived formula
+	 * that gives us the maximum length of the pipe vblank
+	 * we can use without causing display corruption. Following
+	 * this would allow us to have a ticking scanline counter
+	 * everywhere except during the bottom margin (there the
+	 * pipe always stops). Ie. this would eliminate the second
+	 * flat portion of the above graph. However this would also
+	 * complicate vblank timestamping as the pipe vtotal would
+	 * no longer match the average rate the pipe runs at during
+	 * the active portion. Hence following this formula seems
+	 * more trouble that it's worth.
+	 *
+	 * if (IS_GEN(dev_priv, 4)) {
+	 *	num = cdclk * (tv_mode->oversample >> !tv_mode->progressive);
+	 *	den = tv_mode->clock;
+	 * } else {
+	 *	num = tv_mode->oversample >> !tv_mode->progressive;
+	 *	den = 1;
+	 * }
+	 * max_pipe_vblank_len ~=
+	 *	(num * tv_htotal * (tv_vblank_len + top_margin)) /
+	 *	(den * pipe_htotal);
 	 */
+	intel_tv_scale_mode_horiz(adjusted_mode, hdisplay,
+				  conn_state->tv.margins.left,
+				  conn_state->tv.margins.right);
+	intel_tv_scale_mode_vert(adjusted_mode, vdisplay,
+				 conn_state->tv.margins.top,
+				 conn_state->tv.margins.bottom);
+	drm_mode_set_crtcinfo(adjusted_mode, 0);
+	adjusted_mode->name[0] = '\0';
 
 	return 0;
 }
@@ -1410,52 +1653,41 @@ intel_tv_set_mode_type(struct drm_display_mode *mode,
 static int
 intel_tv_get_modes(struct drm_connector *connector)
 {
-	struct drm_display_mode *mode_ptr;
 	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
-	int j, count = 0;
-	u64 tmp;
+	int i, count = 0;
 
-	for (j = 0; j < ARRAY_SIZE(input_res_table);
-	     j++) {
-		const struct input_res *input = &input_res_table[j];
-		unsigned int hactive_s = input->w;
-		unsigned int vactive_s = input->h;
+	for (i = 0; i < ARRAY_SIZE(input_res_table); i++) {
+		const struct input_res *input = &input_res_table[i];
+		struct drm_display_mode *mode;
 
-		if (tv_mode->max_srcw && input->w > tv_mode->max_srcw)
+		if (input->w > 1024 &&
+		    !tv_mode->progressive &&
+		    !tv_mode->component_only)
 			continue;
 
-		if (input->w > 1024 && (!tv_mode->progressive
-					&& !tv_mode->component_only))
+		mode = drm_mode_create(connector->dev);
+		if (!mode)
 			continue;
 
-		mode_ptr = drm_mode_create(connector->dev);
-		if (!mode_ptr)
-			continue;
+		/*
+		 * We take the TV mode and scale it to look
+		 * like it had the expected h/vdisplay. This
+		 * provides the most information to userspace
+		 * about the actual timings of the mode. We
+		 * do ignore the margins though.
+		 */
+		intel_tv_mode_to_mode(mode, tv_mode);
+		if (count == 0) {
+			DRM_DEBUG_KMS("TV mode:\n");
+			drm_mode_debug_printmodeline(mode);
+		}
+		intel_tv_scale_mode_horiz(mode, input->w, 0, 0);
+		intel_tv_scale_mode_vert(mode, input->h, 0, 0);
+		intel_tv_set_mode_type(mode, tv_mode);
 
-		mode_ptr->hdisplay = hactive_s;
-		mode_ptr->hsync_start = hactive_s + 1;
-		mode_ptr->hsync_end = hactive_s + 64;
-		if (mode_ptr->hsync_end <= mode_ptr->hsync_start)
-			mode_ptr->hsync_end = mode_ptr->hsync_start + 1;
-		mode_ptr->htotal = hactive_s + 96;
+		drm_mode_set_name(mode);
 
-		mode_ptr->vdisplay = vactive_s;
-		mode_ptr->vsync_start = vactive_s + 1;
-		mode_ptr->vsync_end = vactive_s + 32;
-		if (mode_ptr->vsync_end <= mode_ptr->vsync_start)
-			mode_ptr->vsync_end = mode_ptr->vsync_start  + 1;
-		mode_ptr->vtotal = vactive_s + 33;
-
-		tmp = mul_u32_u32(tv_mode->refresh, mode_ptr->vtotal);
-		tmp *= mode_ptr->htotal;
-		tmp = div_u64(tmp, 1000000);
-		mode_ptr->clock = (int) tmp;
-
-		intel_tv_set_mode_type(mode_ptr, tv_mode);
-
-		drm_mode_set_name(mode_ptr);
-
-		drm_mode_probed_add(connector, mode_ptr);
+		drm_mode_probed_add(connector, mode);
 		count++;
 	}
 

From 690157f0a9e7e4e903111794fae42bd2ff1abb93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 18:59:59 +0200
Subject: [PATCH 29/89] drm/i915/tv: Fix >1024 modes on gen3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On gen3 we must disable the TV encoder vertical filter for >1024
pixel wide sources. Once that's done all we can is try to center
the image on the screen. Naturally the TV mode vertical resolution
must be equal or larger than the user mode vertical resolution
or else we'd have to cut off part of the user mode.

And while we may not be able to respect the user's choice of
top and bottom borders exactly (or we'd have to reject he mode
most likely), we can try to maintain the relative sizes of the
top and bottom border with respect to each orher.

Additionally we must configure the pipe as interlaced if the
TV mode is interlaced.

v2: Make +intel_tv_connector_duplicate_state() static and drop
    the badly copy pasted kerneldoc
    s/IS_GEN3(dev_priv/IS_GEN(dev_priv, 3)/

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-16-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 91 ++++++++++++++++++++++++++++++---
 1 file changed, 83 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 6897977ac117..cb6829bc762f 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -860,6 +860,35 @@ static const struct tv_mode tv_modes[] = {
 	},
 };
 
+struct intel_tv_connector_state {
+	struct drm_connector_state base;
+
+	/*
+	 * May need to override the user margins for
+	 * gen3 >1024 wide source vertical centering.
+	 */
+	struct {
+		u16 top, bottom;
+	} margins;
+
+	bool bypass_vfilter;
+};
+
+#define to_intel_tv_connector_state(x) container_of(x, struct intel_tv_connector_state, base)
+
+static struct drm_connector_state *
+intel_tv_connector_duplicate_state(struct drm_connector *connector)
+{
+	struct intel_tv_connector_state *state;
+
+	state = kmemdup(connector->state, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	__drm_atomic_helper_connector_duplicate_state(connector, &state->base);
+	return &state->base;
+}
+
 static struct intel_tv *enc_to_tv(struct intel_encoder *encoder)
 {
 	return container_of(encoder, struct intel_tv, base);
@@ -1128,6 +1157,9 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 			struct intel_crtc_state *pipe_config,
 			struct drm_connector_state *conn_state)
 {
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct intel_tv_connector_state *tv_conn_state =
+		to_intel_tv_connector_state(conn_state);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
 	struct drm_display_mode *adjusted_mode =
 		&pipe_config->base.adjusted_mode;
@@ -1148,6 +1180,43 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 	pipe_config->port_clock = tv_mode->clock;
 
 	intel_tv_mode_to_mode(adjusted_mode, tv_mode);
+	drm_mode_set_crtcinfo(adjusted_mode, 0);
+
+	if (IS_GEN(dev_priv, 3) && hdisplay > 1024) {
+		int extra, top, bottom;
+
+		extra = adjusted_mode->crtc_vdisplay - vdisplay;
+
+		if (extra < 0) {
+			DRM_DEBUG_KMS("No vertical scaling for >1024 pixel wide modes\n");
+			return false;
+		}
+
+		/* Need to turn off the vertical filter and center the image */
+
+		/* Attempt to maintain the relative sizes of the margins */
+		top = conn_state->tv.margins.top;
+		bottom = conn_state->tv.margins.bottom;
+
+		if (top + bottom)
+			top = extra * top / (top + bottom);
+		else
+			top = extra / 2;
+		bottom = extra - top;
+
+		tv_conn_state->margins.top = top;
+		tv_conn_state->margins.bottom = bottom;
+
+		tv_conn_state->bypass_vfilter = true;
+
+		if (!tv_mode->progressive)
+			adjusted_mode->flags |= DRM_MODE_FLAG_INTERLACE;
+	} else {
+		tv_conn_state->margins.top = conn_state->tv.margins.top;
+		tv_conn_state->margins.bottom = conn_state->tv.margins.bottom;
+
+		tv_conn_state->bypass_vfilter = false;
+	}
 
 	DRM_DEBUG_KMS("TV mode:\n");
 	drm_mode_debug_printmodeline(adjusted_mode);
@@ -1221,8 +1290,8 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 				  conn_state->tv.margins.left,
 				  conn_state->tv.margins.right);
 	intel_tv_scale_mode_vert(adjusted_mode, vdisplay,
-				 conn_state->tv.margins.top,
-				 conn_state->tv.margins.bottom);
+				 tv_conn_state->margins.top,
+				 tv_conn_state->margins.bottom);
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
 	adjusted_mode->name[0] = '\0';
 
@@ -1315,8 +1384,10 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc);
 	struct intel_tv *intel_tv = enc_to_tv(encoder);
+	const struct intel_tv_connector_state *tv_conn_state =
+		to_intel_tv_connector_state(conn_state);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
-	u32 tv_ctl;
+	u32 tv_ctl, tv_filter_ctl;
 	u32 scctl1, scctl2, scctl3;
 	int i, j;
 	const struct video_levels *video_levels;
@@ -1424,16 +1495,20 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	assert_pipe_disabled(dev_priv, intel_crtc->pipe);
 
 	/* Filter ctl must be set before TV_WIN_SIZE */
-	I915_WRITE(TV_FILTER_CTL_1, TV_AUTO_SCALE);
+	tv_filter_ctl = TV_AUTO_SCALE;
+	if (tv_conn_state->bypass_vfilter)
+		tv_filter_ctl |= TV_V_FILTER_BYPASS;
+	I915_WRITE(TV_FILTER_CTL_1, tv_filter_ctl);
+
 	xsize = tv_mode->hblank_start - tv_mode->hblank_end;
 	ysize = intel_tv_mode_vdisplay(tv_mode);
 
 	xpos = conn_state->tv.margins.left;
-	ypos = conn_state->tv.margins.top;
+	ypos = tv_conn_state->margins.top;
 	xsize -= (conn_state->tv.margins.left +
 		  conn_state->tv.margins.right);
-	ysize -= (conn_state->tv.margins.top +
-		  conn_state->tv.margins.bottom);
+	ysize -= (tv_conn_state->margins.top +
+		  tv_conn_state->margins.bottom);
 	I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos);
 	I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize);
 
@@ -1700,7 +1775,7 @@ static const struct drm_connector_funcs intel_tv_connector_funcs = {
 	.destroy = intel_connector_destroy,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
-	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_duplicate_state = intel_tv_connector_duplicate_state,
 };
 
 static int intel_tv_atomic_check(struct drm_connector *connector,

From 0bb1ffe4eaa40e953f700886515a1edb27dfc24c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 19:00:00 +0200
Subject: [PATCH 30/89] drm/i915/tv: Filter out >1024 wide modes that would
 need vertical scaling on gen3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since gen3 can't handle >1024 wide sources with vertical scaling
let's not advertize such modes in the mode list. Less tempetation
to the user to try out things that won't work.

v2: s/IS_GEN3(dev_priv/IS_GEN(dev_priv, 3)/

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-17-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index cb6829bc762f..f0b9abda7720 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1728,6 +1728,7 @@ intel_tv_set_mode_type(struct drm_display_mode *mode,
 static int
 intel_tv_get_modes(struct drm_connector *connector)
 {
+	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
 	int i, count = 0;
 
@@ -1740,6 +1741,11 @@ intel_tv_get_modes(struct drm_connector *connector)
 		    !tv_mode->component_only)
 			continue;
 
+		/* no vertical scaling with wide sources on gen3 */
+		if (IS_GEN(dev_priv, 3) && input->w > 1024 &&
+		    input->h > intel_tv_mode_vdisplay(tv_mode))
+			continue;
+
 		mode = drm_mode_create(connector->dev);
 		if (!mode)
 			continue;

From 790cc9941b137e00aeb9e9cd6f44d05565c3f271 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 11 Jan 2019 19:08:11 +0200
Subject: [PATCH 31/89] drm/i915: Clean up
 intel_plane_atomic_check_with_state()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename some of the state variables in
intel_plane_atomic_check_with_state() to make it less confusing.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190111170823.4441-2-ville.syrjala@linux.intel.com
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
---
 drivers/gpu/drm/i915/intel_atomic_plane.c | 36 +++++++++++------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index 9a2fdc77ebcb..a1a263026574 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -110,41 +110,39 @@ intel_plane_destroy_state(struct drm_plane *plane,
 }
 
 int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
-					struct intel_crtc_state *crtc_state,
+					struct intel_crtc_state *new_crtc_state,
 					const struct intel_plane_state *old_plane_state,
-					struct intel_plane_state *intel_state)
+					struct intel_plane_state *new_plane_state)
 {
-	struct drm_plane *plane = intel_state->base.plane;
-	struct drm_plane_state *state = &intel_state->base;
-	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane);
 	int ret;
 
-	crtc_state->active_planes &= ~BIT(intel_plane->id);
-	crtc_state->nv12_planes &= ~BIT(intel_plane->id);
-	intel_state->base.visible = false;
+	new_crtc_state->active_planes &= ~BIT(plane->id);
+	new_crtc_state->nv12_planes &= ~BIT(plane->id);
+	new_plane_state->base.visible = false;
 
-	/* If this is a cursor plane, no further checks are needed. */
-	if (!intel_state->base.crtc && !old_plane_state->base.crtc)
+	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
 		return 0;
 
-	ret = intel_plane->check_plane(crtc_state, intel_state);
+	ret = plane->check_plane(new_crtc_state, new_plane_state);
 	if (ret)
 		return ret;
 
 	/* FIXME pre-g4x don't work like this */
-	if (state->visible)
-		crtc_state->active_planes |= BIT(intel_plane->id);
+	if (new_plane_state->base.visible)
+		new_crtc_state->active_planes |= BIT(plane->id);
 
-	if (state->visible && state->fb->format->format == DRM_FORMAT_NV12)
-		crtc_state->nv12_planes |= BIT(intel_plane->id);
+	if (new_plane_state->base.visible &&
+	    new_plane_state->base.fb->format->format == DRM_FORMAT_NV12)
+		new_crtc_state->nv12_planes |= BIT(plane->id);
 
-	if (state->visible || old_plane_state->base.visible)
-		crtc_state->update_planes |= BIT(intel_plane->id);
+	if (new_plane_state->base.visible || old_plane_state->base.visible)
+		new_crtc_state->update_planes |= BIT(plane->id);
 
 	return intel_plane_atomic_calc_changes(old_crtc_state,
-					       &crtc_state->base,
+					       &new_crtc_state->base,
 					       old_plane_state,
-					       state);
+					       &new_plane_state->base);
 }
 
 static int intel_plane_atomic_check(struct drm_plane *plane,

From 29214e8cf6ceaaf1a6ad74ee6a781f79bcb84d1f Mon Sep 17 00:00:00 2001
From: P Raviraj Sitaram <raviraj.p.sitaram@intel.com>
Date: Wed, 19 Dec 2018 13:59:12 +0530
Subject: [PATCH 32/89] drm/i915: correct the pitch check for NV12 framebuffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

framebuffer for NV12 requires the pitch to the multiplier of 4, instead
of the width. This patch corrects it.

For instance, a 480p video, whose width and pitch are 854 and 896
respectively, is excluded for NV12 plane so far.

Changes since v1:
    - Removed check for NV12 buffer dimensions since additional checks
      are done for viewport size in intel_sprite.c

Signed-off-by: Dongseong Hwang <dongseong.hwang@intel.com>
Signed-off-by: P Raviraj Sitaram <raviraj.p.sitaram@intel.com>
Cc: Chandra Konduru <chandra.konduru@intel.com>
Cc: Vidya Srinivas <vidya.srinivas@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1545208152-22658-1-git-send-email-raviraj.p.sitaram@intel.com
---
 drivers/gpu/drm/i915/intel_display.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 67a64cbbe851..aa777182cac9 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14710,14 +14710,6 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 
 	drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd);
 
-	if (fb->format->format == DRM_FORMAT_NV12 &&
-	    (fb->width < SKL_MIN_YUV_420_SRC_W ||
-	     fb->height < SKL_MIN_YUV_420_SRC_H ||
-	     (fb->width % 4) != 0 || (fb->height % 4) != 0)) {
-		DRM_DEBUG_KMS("src dimensions not correct for NV12\n");
-		goto err;
-	}
-
 	for (i = 0; i < fb->format->num_planes; i++) {
 		u32 stride_alignment;
 

From c5627461490e4b913e8747d3b06541e5264a50d7 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Sat, 26 Jan 2019 00:11:23 -0700
Subject: [PATCH 33/89] drm/i915: Disable -Wuninitialized

This warning is disabled by default in scripts/Makefile.extrawarn when
W= is not provided but this Makefile adds -Wall after this warning is
disabled so it shows up in the build when it shouldn't:

In file included from drivers/gpu/drm/i915/intel_breadcrumbs.c:895:
drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c:350:34: error:
variable 'wq' is uninitialized when used within its own initialization
[-Werror,-Wuninitialized]
        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
                                        ^~
./include/linux/wait.h:74:63: note: expanded from macro
'DECLARE_WAIT_QUEUE_HEAD_ONSTACK'
        struct wait_queue_head name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
                               ~~~~                                  ^~~~
./include/linux/wait.h:72:33: note: expanded from macro
'__WAIT_QUEUE_HEAD_INIT_ONSTACK'
        ({ init_waitqueue_head(&name); name; })
                                       ^~~~
1 error generated.

Explicitly disable the warning like commit 46e2068081e9 ("drm/i915:
Disable some extra clang warnings").

Link: https://github.com/ClangBuiltLinux/linux/issues/220
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nick Desaulniers <nick.desaulniers@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190126071122.24557-1-natechancellor@gmail.com
---
 drivers/gpu/drm/i915/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 8300efe60fe1..210d0e8777b6 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -22,6 +22,7 @@ subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable)
 subdir-ccflags-y += $(call cc-disable-warning, sign-compare)
 subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized)
 subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides)
+subdir-ccflags-y += $(call cc-disable-warning, uninitialized)
 subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror
 
 # Fine grained warnings disable

From ad4062da1397e32a942191c61de3eabe1fb3664f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 28 Jan 2019 01:02:18 +0000
Subject: [PATCH 34/89] drm/i915: Wait for a moment before forcibly resetting
 the device

During igt, we ask to reset the device if any requests are still
outstanding at the end of a test, as this quickly kills off any
erroneous hanging request streams that may escape a test. However, since
it may take the device a few milliseconds to flush itself after the end
of a normal test, *cough* guc *cough*, we may accidentally tell the
device to reset itself after it idles. If we wait a moment, our usual
I915_IDLE_ENGINES_TIMEOUT of 200ms (seems a bit high, but still better
than umpteen hangchecks!), we can differentiate better between a stuck
engine and a healthy one, and so avoid prematurely forcing the reset and
any extra complications that may entail.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128010245.20148-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_debugfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 76dea0572f3e..ecf762e24d0e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4050,7 +4050,8 @@ i915_drop_caches_set(void *data, u64 val)
 		  val, val & DROP_ALL);
 	wakeref = intel_runtime_pm_get(i915);
 
-	if (val & DROP_RESET_ACTIVE && !intel_engines_are_idle(i915))
+	if (val & DROP_RESET_ACTIVE &&
+	    wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT))
 		i915_gem_set_wedged(i915);
 
 	/* No need to check and wait for gpu resets, only libdrm auto-restarts

From 6a2a94041052dd4616a56b6a9aa25dd2ad51c772 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 25 Jan 2019 20:19:30 +0200
Subject: [PATCH 35/89] drm/i915/tv: Fix return value for
 intel_tv_compute_config()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ever since commit 204474a6b859 ("drm/i915: Pass down rc in
intel_encoder->compute_config()") we're supposed to return an
errno from .compute_config(). I failed to notice that when
pushing the TV encoder fixes which were written before said
commmit. Fix up the return value for the error case.

Cc: Imre Deak <imre.deak@intel.com>
Fixes: 690157f0a9e7 ("drm/i915/tv: Fix >1024 modes on gen3")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125181931.19482-1-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_tv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index f0b9abda7720..78be08e2971b 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1189,7 +1189,7 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 
 		if (extra < 0) {
 			DRM_DEBUG_KMS("No vertical scaling for >1024 pixel wide modes\n");
-			return false;
+			return -EINVAL;
 		}
 
 		/* Need to turn off the vertical filter and center the image */

From 8a920e24f05802e2ed88d0454a7253449c4654c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 25 Jan 2019 20:19:31 +0200
Subject: [PATCH 36/89] drm/i915/tv: Use the scanline counter for timestamps on
 i965gm TV output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just like the frame counter, the pixel counter also reads zero
all the time when the TV encoder is used. Fortunately the
scanline counter still works sufficiently well so let's use that
to correct the vblank timestamps. Otherwise the timestamps may
en up out of whack, and since we use them to guesstimate the
vblank counter value that may end up incorrect as well.

Cc: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125181931.19482-2-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c  |  7 +++++--
 drivers/gpu/drm/i915/intel_drv.h |  4 +++-
 drivers/gpu/drm/i915/intel_tv.c  | 10 ++++++++++
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 926e05f0db24..26ab00862b59 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1012,6 +1012,9 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 	int position;
 	int vbl_start, vbl_end, hsync_start, htotal, vtotal;
 	unsigned long irqflags;
+	bool use_scanline_counter = INTEL_GEN(dev_priv) >= 5 ||
+		IS_G4X(dev_priv) || IS_GEN(dev_priv, 2) ||
+		mode->private_flags & I915_MODE_FLAG_USE_SCANLINE_COUNTER;
 
 	if (WARN_ON(!mode->crtc_clock)) {
 		DRM_DEBUG_DRIVER("trying to get scanoutpos for disabled "
@@ -1044,7 +1047,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 	if (stime)
 		*stime = ktime_get();
 
-	if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) {
+	if (use_scanline_counter) {
 		/* No obvious pixelcount register. Only query vertical
 		 * scanout position from Display scan line register.
 		 */
@@ -1104,7 +1107,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 	else
 		position += vtotal - vbl_end;
 
-	if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) {
+	if (use_scanline_counter) {
 		*vpos = position;
 		*hpos = 0;
 	} else {
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 47195320413a..35fbc527c19e 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -629,9 +629,11 @@ struct intel_crtc_scaler_state {
 };
 
 /* drm_mode->private_flags */
-#define I915_MODE_FLAG_INHERITED 1
+#define I915_MODE_FLAG_INHERITED (1<<0)
 /* Flag to get scanline using frame time stamps */
 #define I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP (1<<1)
+/* Flag to use the scanline counter instead of the pixel counter */
+#define I915_MODE_FLAG_USE_SCANLINE_COUNTER (1<<2)
 
 struct intel_pipe_wm {
 	struct intel_wm_level wm[5];
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 78be08e2971b..751b88dde18e 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1150,6 +1150,11 @@ intel_tv_get_config(struct intel_encoder *encoder,
 				 ypos, mode.vdisplay - ysize - ypos);
 
 	adjusted_mode->crtc_clock = mode.clock;
+
+	/* pixel counter doesn't work on i965gm TV output */
+	if (IS_I965GM(dev_priv))
+		adjusted_mode->private_flags |=
+			I915_MODE_FLAG_USE_SCANLINE_COUNTER;
 }
 
 static int
@@ -1295,6 +1300,11 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
 	adjusted_mode->name[0] = '\0';
 
+	/* pixel counter doesn't work on i965gm TV output */
+	if (IS_I965GM(dev_priv))
+		adjusted_mode->private_flags |=
+			I915_MODE_FLAG_USE_SCANLINE_COUNTER;
+
 	return 0;
 }
 

From 7bed8adcd9f86231bb69bbc02f88ad89330f99e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 11 Jan 2019 19:49:50 +0200
Subject: [PATCH 37/89] drm/i915: Try to sanitize bogus DPLL state left over by
 broken SNB BIOSen
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Certain SNB machines (eg. ASUS K53SV) seem to have a broken BIOS
which misprograms the hardware badly when encountering a suitably
high resolution display. The programmed pipe timings are somewhat
bonkers and the DPLL is totally misprogrammed (P divider == 0).
That will result in atomic commit timeouts as apparently the pipe
is sufficiently stuck to not signal vblank interrupts.

IIRC something like this was also observed on some other SNB
machine years ago (might have been a Dell XPS 8300) but a BIOS
update cured it. Sadly looks like this was never fixed for the
ASUS K53SV as the latest BIOS (K53SV.320 11/11/2011) is still
broken.

The quickest way to deal with this seems to be to shut down
the pipe+ports+DPLL. Unfortunately doing this during the
normal sanitization phase isn't quite soon enough as we
already spew several WARNs about the bogus hardware state.
But it's better than hanging the boot for a few dozen seconds.
Since this is limited to a few old machines it doesn't seem
entirely worthwile to try and rework the readout+sanitization
code to handle it more gracefully.

v2: Fix potential NULL deref (kbuild test robot)
    Constify has_bogus_dpll_config()

Cc: stable@vger.kernel.org # v4.20+
Cc: Daniel Kamil Kozar <dkk089@gmail.com>
Reported-by: Daniel Kamil Kozar <dkk089@gmail.com>
Tested-by: Daniel Kamil Kozar <dkk089@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109245
Fixes: 516a49cc1946 ("drm/i915: Fix assert_plane() warning on bootup with external display")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190111174950.10681-1-ville.syrjala@linux.intel.com
Reviewed-by: Mika Kahola <mika.kahola@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 50 ++++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index aa777182cac9..6cf480a0eb23 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -15515,16 +15515,45 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
 	}
 }
 
+static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+
+	/*
+	 * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram
+	 * the hardware when a high res displays plugged in. DPLL P
+	 * divider is zero, and the pipe timings are bonkers. We'll
+	 * try to disable everything in that case.
+	 *
+	 * FIXME would be nice to be able to sanitize this state
+	 * without several WARNs, but for now let's take the easy
+	 * road.
+	 */
+	return IS_GEN(dev_priv, 6) &&
+		crtc_state->base.active &&
+		crtc_state->shared_dpll &&
+		crtc_state->port_clock == 0;
+}
+
 static void intel_sanitize_encoder(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_connector *connector;
+	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+	struct intel_crtc_state *crtc_state = crtc ?
+		to_intel_crtc_state(crtc->base.state) : NULL;
 
 	/* We need to check both for a crtc link (meaning that the
 	 * encoder is active and trying to read from a pipe) and the
 	 * pipe itself being active. */
-	bool has_active_crtc = encoder->base.crtc &&
-		to_intel_crtc(encoder->base.crtc)->active;
+	bool has_active_crtc = crtc_state &&
+		crtc_state->base.active;
+
+	if (crtc_state && has_bogus_dpll_config(crtc_state)) {
+		DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n",
+			      pipe_name(crtc->pipe));
+		has_active_crtc = false;
+	}
 
 	connector = intel_encoder_find_connector(encoder);
 	if (connector && !has_active_crtc) {
@@ -15535,16 +15564,25 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 		/* Connector is active, but has no active pipe. This is
 		 * fallout from our resume register restoring. Disable
 		 * the encoder manually again. */
-		if (encoder->base.crtc) {
-			struct drm_crtc_state *crtc_state = encoder->base.crtc->state;
+		if (crtc_state) {
+			struct drm_encoder *best_encoder;
 
 			DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n",
 				      encoder->base.base.id,
 				      encoder->base.name);
+
+			/* avoid oopsing in case the hooks consult best_encoder */
+			best_encoder = connector->base.state->best_encoder;
+			connector->base.state->best_encoder = &encoder->base;
+
 			if (encoder->disable)
-				encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
+				encoder->disable(encoder, crtc_state,
+						 connector->base.state);
 			if (encoder->post_disable)
-				encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
+				encoder->post_disable(encoder, crtc_state,
+						      connector->base.state);
+
+			connector->base.state->best_encoder = best_encoder;
 		}
 		encoder->base.crtc = NULL;
 

From 499197dc169601116e106cabe409bf39295893b3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 28 Jan 2019 10:23:52 +0000
Subject: [PATCH 38/89] drm/i915: Stop tracking MRU activity on VMA

Our goal is to remove struct_mutex and replace it with fine grained
locking. One of the thorny issues is our eviction logic for reclaiming
space for an execbuffer (or GTT mmaping, among a few other examples).
While eviction itself is easy to move under a per-VM mutex, performing
the activity tracking is less agreeable. One solution is not to do any
MRU tracking and do a simple coarse evaluation during eviction of
active/inactive, with a loose temporal ordering of last
insertion/evaluation. That keeps all the locking constrained to when we
are manipulating the VM itself, neatly avoiding the tricky handling of
possible recursive locking during execbuf and elsewhere.

Note that discarding the MRU (currently implemented as a pair of lists,
to avoid scanning the active list for a NONBLOCKING search) is unlikely
to impact upon our efficiency to reclaim VM space (where we think a LRU
model is best) as our current strategy is to use random idle replacement
first before doing a search, and over time the use of softpinned 48b
per-ppGTT is growing (thereby eliminating any need to perform any eviction
searches, in theory at least) with the remaining users being found on
much older devices (gen2-gen6).

v2: Changelog and commentary rewritten to elaborate on the duality of a
single list being both an inactive and active list.
v3: Consolidate bool parameters into a single set of flags; don't
comment on the duality of a single variable being a multiplicity of
bits.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem.c               | 10 +--
 drivers/gpu/drm/i915/i915_gem_evict.c         | 87 +++++++++++--------
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 15 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.h           | 26 +-----
 drivers/gpu/drm/i915/i915_gem_shrinker.c      |  8 +-
 drivers/gpu/drm/i915/i915_gem_stolen.c        |  3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         | 42 ++++-----
 drivers/gpu/drm/i915/i915_vma.c               |  9 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  2 +-
 10 files changed, 95 insertions(+), 111 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d20b42386c3c..f45186ddb236 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -253,10 +253,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 
 	pinned = ggtt->vm.reserved;
 	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry(vma, &ggtt->vm.active_list, vm_link)
-		if (i915_vma_is_pinned(vma))
-			pinned += vma->node.size;
-	list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link)
+	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
 		if (i915_vma_is_pinned(vma))
 			pinned += vma->node.size;
 	mutex_unlock(&dev->struct_mutex);
@@ -1539,13 +1536,10 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
 	for_each_ggtt_vma(vma, obj) {
-		if (i915_vma_is_active(vma))
-			continue;
-
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
-		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 	}
 
 	i915 = to_i915(obj->base.dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index f6855401f247..d76839670632 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -126,31 +126,25 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	struct drm_i915_private *dev_priv = vm->i915;
 	struct drm_mm_scan scan;
 	struct list_head eviction_list;
-	struct list_head *phases[] = {
-		&vm->inactive_list,
-		&vm->active_list,
-		NULL,
-	}, **phase;
 	struct i915_vma *vma, *next;
 	struct drm_mm_node *node;
 	enum drm_mm_insert_mode mode;
+	struct i915_vma *active;
 	int ret;
 
 	lockdep_assert_held(&vm->i915->drm.struct_mutex);
 	trace_i915_gem_evict(vm, min_size, alignment, flags);
 
 	/*
-	 * The goal is to evict objects and amalgamate space in LRU order.
-	 * The oldest idle objects reside on the inactive list, which is in
-	 * retirement order. The next objects to retire are those in flight,
-	 * on the active list, again in retirement order.
+	 * The goal is to evict objects and amalgamate space in rough LRU order.
+	 * Since both active and inactive objects reside on the same list,
+	 * in a mix of creation and last scanned order, as we process the list
+	 * we sort it into inactive/active, which keeps the active portion
+	 * in a rough MRU order.
 	 *
 	 * The retirement sequence is thus:
-	 *   1. Inactive objects (already retired)
-	 *   2. Active objects (will stall on unbinding)
-	 *
-	 * On each list, the oldest objects lie at the HEAD with the freshest
-	 * object on the TAIL.
+	 *   1. Inactive objects (already retired, random order)
+	 *   2. Active objects (will stall on unbinding, oldest scanned first)
 	 */
 	mode = DRM_MM_INSERT_BEST;
 	if (flags & PIN_HIGH)
@@ -169,17 +163,46 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	 */
 	if (!(flags & PIN_NONBLOCK))
 		i915_retire_requests(dev_priv);
-	else
-		phases[1] = NULL;
 
 search_again:
+	active = NULL;
 	INIT_LIST_HEAD(&eviction_list);
-	phase = phases;
-	do {
-		list_for_each_entry(vma, *phase, vm_link)
-			if (mark_free(&scan, vma, flags, &eviction_list))
-				goto found;
-	} while (*++phase);
+	list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
+		/*
+		 * We keep this list in a rough least-recently scanned order
+		 * of active elements (inactive elements are cheap to reap).
+		 * New entries are added to the end, and we move anything we
+		 * scan to the end. The assumption is that the working set
+		 * of applications is either steady state (and thanks to the
+		 * userspace bo cache it almost always is) or volatile and
+		 * frequently replaced after a frame, which are self-evicting!
+		 * Given that assumption, the MRU order of the scan list is
+		 * fairly static, and keeping it in least-recently scan order
+		 * is suitable.
+		 *
+		 * To notice when we complete one full cycle, we record the
+		 * first active element seen, before moving it to the tail.
+		 */
+		if (i915_vma_is_active(vma)) {
+			if (vma == active) {
+				if (flags & PIN_NONBLOCK)
+					break;
+
+				active = ERR_PTR(-EAGAIN);
+			}
+
+			if (active != ERR_PTR(-EAGAIN)) {
+				if (!active)
+					active = vma;
+
+				list_move_tail(&vma->vm_link, &vm->bound_list);
+				continue;
+			}
+		}
+
+		if (mark_free(&scan, vma, flags, &eviction_list))
+			goto found;
+	}
 
 	/* Nothing found, clean up and bail out! */
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
@@ -388,11 +411,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
  */
 int i915_gem_evict_vm(struct i915_address_space *vm)
 {
-	struct list_head *phases[] = {
-		&vm->inactive_list,
-		&vm->active_list,
-		NULL
-	}, **phase;
 	struct list_head eviction_list;
 	struct i915_vma *vma, *next;
 	int ret;
@@ -412,16 +430,13 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	}
 
 	INIT_LIST_HEAD(&eviction_list);
-	phase = phases;
-	do {
-		list_for_each_entry(vma, *phase, vm_link) {
-			if (i915_vma_is_pinned(vma))
-				continue;
+	list_for_each_entry(vma, &vm->bound_list, vm_link) {
+		if (i915_vma_is_pinned(vma))
+			continue;
 
-			__i915_vma_pin(vma);
-			list_add(&vma->evict_link, &eviction_list);
-		}
-	} while (*++phase);
+		__i915_vma_pin(vma);
+		list_add(&vma->evict_link, &eviction_list);
+	}
 
 	ret = 0;
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9081e3bc5a59..2ad9070a54c1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -491,9 +491,8 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 
 	stash_init(&vm->free_pages);
 
-	INIT_LIST_HEAD(&vm->active_list);
-	INIT_LIST_HEAD(&vm->inactive_list);
 	INIT_LIST_HEAD(&vm->unbound_list);
+	INIT_LIST_HEAD(&vm->bound_list);
 }
 
 static void i915_address_space_fini(struct i915_address_space *vm)
@@ -2111,8 +2110,7 @@ void i915_ppgtt_close(struct i915_address_space *vm)
 static void ppgtt_destroy_vma(struct i915_address_space *vm)
 {
 	struct list_head *phases[] = {
-		&vm->active_list,
-		&vm->inactive_list,
+		&vm->bound_list,
 		&vm->unbound_list,
 		NULL,
 	}, **phase;
@@ -2135,8 +2133,7 @@ void i915_ppgtt_release(struct kref *kref)
 
 	ppgtt_destroy_vma(&ppgtt->vm);
 
-	GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list));
-	GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list));
+	GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list));
 	GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
 
 	ppgtt->vm.cleanup(&ppgtt->vm);
@@ -2801,8 +2798,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	i915_gem_fini_aliasing_ppgtt(dev_priv);
 
-	GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link)
+	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
 		WARN_ON(i915_vma_unbind(vma));
 
 	if (drm_mm_node_allocated(&ggtt->error_capture))
@@ -3514,8 +3510,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
 
 	/* clflush objects bound into the GGTT and rebind them. */
-	GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) {
+	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a0039ea97cdc..bd679c8c56dd 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -299,32 +299,12 @@ struct i915_address_space {
 	struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */
 
 	/**
-	 * List of objects currently involved in rendering.
-	 *
-	 * Includes buffers having the contents of their GPU caches
-	 * flushed, not necessarily primitives. last_read_req
-	 * represents when the rendering involved will be completed.
-	 *
-	 * A reference is held on the buffer while on this list.
+	 * List of vma currently bound.
 	 */
-	struct list_head active_list;
+	struct list_head bound_list;
 
 	/**
-	 * LRU list of objects which are not in the ringbuffer and
-	 * are ready to unbind, but are still in the GTT.
-	 *
-	 * last_read_req is NULL while an object is in this list.
-	 *
-	 * A reference is not held on the buffer while on this list,
-	 * as merely being GTT-bound shouldn't prevent its being
-	 * freed, and we'll pull it off the list in the free path.
-	 */
-	struct list_head inactive_list;
-
-	/**
-	 * List of vma that have been unbound.
-	 *
-	 * A reference is not held on the buffer while on this list.
+	 * List of vma that are not unbound.
 	 */
 	struct list_head unbound_list;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 8ceecb026910..a76d6c95c824 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -462,9 +462,13 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
 	list_for_each_entry_safe(vma, next,
-				 &i915->ggtt.vm.inactive_list, vm_link) {
+				 &i915->ggtt.vm.bound_list, vm_link) {
 		unsigned long count = vma->node.size >> PAGE_SHIFT;
-		if (vma->iomap && i915_vma_unbind(vma) == 0)
+
+		if (!vma->iomap || i915_vma_is_active(vma))
+			continue;
+
+		if (i915_vma_unbind(vma) == 0)
 			freed_pages += count;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 9df615eea2d8..a9e365789686 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -701,7 +701,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	vma->pages = obj->mm.pages;
 	vma->flags |= I915_VMA_GLOBAL_BIND;
 	__i915_vma_set_map_and_fenceable(vma);
-	list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list);
+
+	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
 
 	spin_lock(&dev_priv->mm.obj_lock);
 	list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 4eef0462489c..898e06014295 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1121,7 +1121,9 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 
 static u32 capture_error_bo(struct drm_i915_error_buffer *err,
 			    int count, struct list_head *head,
-			    bool pinned_only)
+			    unsigned int flags)
+#define ACTIVE_ONLY BIT(0)
+#define PINNED_ONLY BIT(1)
 {
 	struct i915_vma *vma;
 	int i = 0;
@@ -1130,7 +1132,10 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
 		if (!vma->obj)
 			continue;
 
-		if (pinned_only && !i915_vma_is_pinned(vma))
+		if (flags & ACTIVE_ONLY && !i915_vma_is_active(vma))
+			continue;
+
+		if (flags & PINNED_ONLY && !i915_vma_is_pinned(vma))
 			continue;
 
 		capture_bo(err++, vma);
@@ -1601,14 +1606,17 @@ static void gem_capture_vm(struct i915_gpu_state *error,
 	int count;
 
 	count = 0;
-	list_for_each_entry(vma, &vm->active_list, vm_link)
-		count++;
+	list_for_each_entry(vma, &vm->bound_list, vm_link)
+		if (i915_vma_is_active(vma))
+			count++;
 
 	active_bo = NULL;
 	if (count)
 		active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
 	if (active_bo)
-		count = capture_error_bo(active_bo, count, &vm->active_list, false);
+		count = capture_error_bo(active_bo,
+					 count, &vm->bound_list,
+					 ACTIVE_ONLY);
 	else
 		count = 0;
 
@@ -1646,28 +1654,20 @@ static void capture_pinned_buffers(struct i915_gpu_state *error)
 	struct i915_address_space *vm = &error->i915->ggtt.vm;
 	struct drm_i915_error_buffer *bo;
 	struct i915_vma *vma;
-	int count_inactive, count_active;
+	int count;
 
-	count_inactive = 0;
-	list_for_each_entry(vma, &vm->inactive_list, vm_link)
-		count_inactive++;
-
-	count_active = 0;
-	list_for_each_entry(vma, &vm->active_list, vm_link)
-		count_active++;
+	count = 0;
+	list_for_each_entry(vma, &vm->bound_list, vm_link)
+		count++;
 
 	bo = NULL;
-	if (count_inactive + count_active)
-		bo = kcalloc(count_inactive + count_active,
-			     sizeof(*bo), GFP_ATOMIC);
+	if (count)
+		bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
 	if (!bo)
 		return;
 
-	count_inactive = capture_error_bo(bo, count_inactive,
-					  &vm->active_list, true);
-	count_active = capture_error_bo(bo + count_inactive, count_active,
-					&vm->inactive_list, true);
-	error->pinned_bo_count = count_inactive + count_active;
+	error->pinned_bo_count =
+		capture_error_bo(bo, count, &vm->bound_list, PINNED_ONLY);
 	error->pinned_bo = bo;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 5b4d78cdb4ca..7de28baffb8f 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -79,9 +79,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
 	if (--vma->active_count)
 		return;
 
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
-
 	GEM_BUG_ON(!i915_gem_object_is_active(obj));
 	if (--obj->active_count)
 		return;
@@ -659,7 +656,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
 
-	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
@@ -1003,10 +1000,8 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_gem_active_isset(active) && !vma->active_count++) {
-		list_move_tail(&vma->vm_link, &vma->vm->active_list);
+	if (!i915_gem_active_isset(active) && !vma->active_count++)
 		obj->active_count++;
-	}
 	i915_gem_active_set(active, rq);
 	GEM_BUG_ON(!i915_vma_is_active(vma));
 	GEM_BUG_ON(!obj->active_count);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index d0553bc69705..af9b85cb8639 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -84,7 +84,7 @@ static int populate_ggtt(struct drm_i915_private *i915,
 		return -EINVAL;
 	}
 
-	if (list_empty(&i915->ggtt.vm.inactive_list)) {
+	if (list_empty(&i915->ggtt.vm.bound_list)) {
 		pr_err("No objects on the GGTT inactive list!\n");
 		return -EINVAL;
 	}
@@ -96,7 +96,7 @@ static void unpin_ggtt(struct drm_i915_private *i915)
 {
 	struct i915_vma *vma;
 
-	list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link)
+	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
 		if (vma->obj->mm.quirked)
 			i915_vma_unpin(vma);
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 06bde4a273cb..8feb4af308ff 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1237,7 +1237,7 @@ static void track_vma_bind(struct i915_vma *vma)
 	__i915_gem_object_pin_pages(obj);
 
 	vma->pages = obj->mm.pages;
-	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 }
 
 static int exercise_mock(struct drm_i915_private *i915,

From 09d7e46b97c663c9b7f5245871a8f19114e9148d Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 28 Jan 2019 10:23:53 +0000
Subject: [PATCH 39/89] drm/i915: Pull VM lists under the VM mutex.

A starting point to counter the pervasive struct_mutex. For the goal of
avoiding (or at least blocking under them!) global locks during user
request submission, a simple but important step is being able to manage
each clients GTT separately. For which, we want to replace using the
struct_mutex as the guard for all things GTT/VM and switch instead to a
specific mutex inside i915_address_space.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-2-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem.c                 | 14 ++++++++------
 drivers/gpu/drm/i915/i915_gem_evict.c           |  2 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c             | 15 +++++++++++++--
 drivers/gpu/drm/i915/i915_gem_shrinker.c        |  4 ++++
 drivers/gpu/drm/i915/i915_gem_stolen.c          |  2 ++
 drivers/gpu/drm/i915/i915_vma.c                 | 11 +++++++++++
 drivers/gpu/drm/i915/selftests/i915_gem_evict.c |  3 +++
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c   |  3 +++
 8 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f45186ddb236..538fa5404603 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -245,18 +245,19 @@ int
 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
 	struct drm_i915_gem_get_aperture *args = data;
 	struct i915_vma *vma;
 	u64 pinned;
 
+	mutex_lock(&ggtt->vm.mutex);
+
 	pinned = ggtt->vm.reserved;
-	mutex_lock(&dev->struct_mutex);
 	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
 		if (i915_vma_is_pinned(vma))
 			pinned += vma->node.size;
-	mutex_unlock(&dev->struct_mutex);
+
+	mutex_unlock(&ggtt->vm.mutex);
 
 	args->aper_size = ggtt->vm.total;
 	args->aper_available_size = args->aper_size - pinned;
@@ -1529,20 +1530,21 @@ err:
 
 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_private *i915;
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct list_head *list;
 	struct i915_vma *vma;
 
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
+	mutex_lock(&i915->ggtt.vm.mutex);
 	for_each_ggtt_vma(vma, obj) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
 		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
 
-	i915 = to_i915(obj->base.dev);
 	spin_lock(&i915->mm.obj_lock);
 	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
 	list_move_tail(&obj->mm.link, list);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index d76839670632..68d74c50ac39 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -430,6 +430,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	}
 
 	INIT_LIST_HEAD(&eviction_list);
+	mutex_lock(&vm->mutex);
 	list_for_each_entry(vma, &vm->bound_list, vm_link) {
 		if (i915_vma_is_pinned(vma))
 			continue;
@@ -437,6 +438,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 		__i915_vma_pin(vma);
 		list_add(&vma->evict_link, &eviction_list);
 	}
+	mutex_unlock(&vm->mutex);
 
 	ret = 0;
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2ad9070a54c1..49b00996a15e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1931,7 +1931,10 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
 
 	INIT_LIST_HEAD(&vma->obj_link);
+
+	mutex_lock(&vma->vm->mutex);
 	list_add(&vma->vm_link, &vma->vm->unbound_list);
+	mutex_unlock(&vma->vm->mutex);
 
 	return vma;
 }
@@ -3504,9 +3507,10 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 
 	i915_check_and_clear_faults(dev_priv);
 
+	mutex_lock(&ggtt->vm.mutex);
+
 	/* First fill our portion of the GTT with scratch pages */
 	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
-
 	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
 
 	/* clflush objects bound into the GGTT and rebind them. */
@@ -3516,19 +3520,26 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
 			continue;
 
+		mutex_unlock(&ggtt->vm.mutex);
+
 		if (!i915_vma_unbind(vma))
-			continue;
+			goto lock;
 
 		WARN_ON(i915_vma_bind(vma,
 				      obj ? obj->cache_level : 0,
 				      PIN_UPDATE));
 		if (obj)
 			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+
+lock:
+		mutex_lock(&ggtt->vm.mutex);
 	}
 
 	ggtt->vm.closed = false;
 	i915_ggtt_invalidate(dev_priv);
 
+	mutex_unlock(&ggtt->vm.mutex);
+
 	if (INTEL_GEN(dev_priv) >= 8) {
 		struct intel_ppat *ppat = &dev_priv->ppat;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index a76d6c95c824..6da795c7e62e 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -461,6 +461,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 					       I915_SHRINK_VMAPS);
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
+	mutex_lock(&i915->ggtt.vm.mutex);
 	list_for_each_entry_safe(vma, next,
 				 &i915->ggtt.vm.bound_list, vm_link) {
 		unsigned long count = vma->node.size >> PAGE_SHIFT;
@@ -468,9 +469,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		if (!vma->iomap || i915_vma_is_active(vma))
 			continue;
 
+		mutex_unlock(&i915->ggtt.vm.mutex);
 		if (i915_vma_unbind(vma) == 0)
 			freed_pages += count;
+		mutex_lock(&i915->ggtt.vm.mutex);
 	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
 
 out:
 	shrinker_unlock(i915, unlock);
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index a9e365789686..74a9661479ca 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -702,7 +702,9 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	vma->flags |= I915_VMA_GLOBAL_BIND;
 	__i915_vma_set_map_and_fenceable(vma);
 
+	mutex_lock(&ggtt->vm.mutex);
 	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
+	mutex_unlock(&ggtt->vm.mutex);
 
 	spin_lock(&dev_priv->mm.obj_lock);
 	list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 7de28baffb8f..dcbd0d345c72 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -213,7 +213,10 @@ vma_create(struct drm_i915_gem_object *obj,
 	}
 	rb_link_node(&vma->obj_node, rb, p);
 	rb_insert_color(&vma->obj_node, &obj->vma_tree);
+
+	mutex_lock(&vm->mutex);
 	list_add(&vma->vm_link, &vm->unbound_list);
+	mutex_unlock(&vm->mutex);
 
 	return vma;
 
@@ -656,7 +659,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
 
+	mutex_lock(&vma->vm->mutex);
 	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	mutex_unlock(&vma->vm->mutex);
 
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
@@ -689,8 +694,10 @@ i915_vma_remove(struct i915_vma *vma)
 
 	vma->ops->clear_pages(vma);
 
+	mutex_lock(&vma->vm->mutex);
 	drm_mm_remove_node(&vma->node);
 	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+	mutex_unlock(&vma->vm->mutex);
 
 	/*
 	 * Since the unbound list is global, only move to that list if
@@ -802,7 +809,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
 
 	list_del(&vma->obj_link);
+
+	mutex_lock(&vma->vm->mutex);
 	list_del(&vma->vm_link);
+	mutex_unlock(&vma->vm->mutex);
+
 	if (vma->obj)
 		rb_erase(&vma->obj_node, &vma->obj->vma_tree);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index af9b85cb8639..32dce7176f63 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -94,11 +94,14 @@ static int populate_ggtt(struct drm_i915_private *i915,
 
 static void unpin_ggtt(struct drm_i915_private *i915)
 {
+	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_vma *vma;
 
+	mutex_lock(&ggtt->vm.mutex);
 	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
 		if (vma->obj->mm.quirked)
 			i915_vma_unpin(vma);
+	mutex_unlock(&ggtt->vm.mutex);
 }
 
 static void cleanup_objects(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 8feb4af308ff..3850ef4a5ec8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1237,7 +1237,10 @@ static void track_vma_bind(struct i915_vma *vma)
 	__i915_gem_object_pin_pages(obj);
 
 	vma->pages = obj->mm.pages;
+
+	mutex_lock(&vma->vm->mutex);
 	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	mutex_unlock(&vma->vm->mutex);
 }
 
 static int exercise_mock(struct drm_i915_private *i915,

From 528cbd17ceff070747a312c6312346b585495157 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 28 Jan 2019 10:23:54 +0000
Subject: [PATCH 40/89] drm/i915: Move vma lookup to its own lock

Remove the struct_mutex requirement for looking up the vma for an
object.

v2: Highlight how the race for duplicate vma creation is resolved on
reacquiring the lock with a short comment.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-3-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_debugfs.c       |  6 +-
 drivers/gpu/drm/i915/i915_gem.c           | 33 ++++++----
 drivers/gpu/drm/i915/i915_gem_object.h    | 45 ++++++++-----
 drivers/gpu/drm/i915/i915_vma.c           | 80 +++++++++++++++--------
 drivers/gpu/drm/i915/i915_vma.h           |  2 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c |  4 +-
 6 files changed, 105 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ecf762e24d0e..c9c230499420 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -159,14 +159,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
 	if (obj->base.name)
 		seq_printf(m, " (name: %d)", obj->base.name);
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (i915_vma_is_pinned(vma))
 			pin_count++;
 	}
 	seq_printf(m, " (pinned x %d)", pin_count);
 	if (obj->pin_global)
 		seq_printf(m, " (global)");
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
@@ -322,7 +322,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 	if (obj->base.name || obj->base.dma_buf)
 		stats->shared += obj->base.size;
 
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 538fa5404603..15acd052da46 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -437,15 +437,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	if (ret)
 		return ret;
 
-	while ((vma = list_first_entry_or_null(&obj->vma_list,
-					       struct i915_vma,
-					       obj_link))) {
+	spin_lock(&obj->vma.lock);
+	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
+						       struct i915_vma,
+						       obj_link))) {
 		list_move_tail(&vma->obj_link, &still_in_list);
+		spin_unlock(&obj->vma.lock);
+
 		ret = i915_vma_unbind(vma);
-		if (ret)
-			break;
+
+		spin_lock(&obj->vma.lock);
 	}
-	list_splice(&still_in_list, &obj->vma_list);
+	list_splice(&still_in_list, &obj->vma.list);
+	spin_unlock(&obj->vma.lock);
 
 	return ret;
 }
@@ -3489,7 +3493,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	 * reading an invalid PTE on older architectures.
 	 */
 restart:
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
@@ -3567,7 +3571,7 @@ restart:
 			 */
 		}
 
-		list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		list_for_each_entry(vma, &obj->vma.list, obj_link) {
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
@@ -3577,7 +3581,7 @@ restart:
 		}
 	}
 
-	list_for_each_entry(vma, &obj->vma_list, obj_link)
+	list_for_each_entry(vma, &obj->vma.list, obj_link)
 		vma->node.color = cache_level;
 	i915_gem_object_set_cache_coherency(obj, cache_level);
 	obj->cache_dirty = true; /* Always invalidate stale cachelines */
@@ -4153,7 +4157,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 {
 	mutex_init(&obj->mm.lock);
 
-	INIT_LIST_HEAD(&obj->vma_list);
+	spin_lock_init(&obj->vma.lock);
+	INIT_LIST_HEAD(&obj->vma.list);
+
 	INIT_LIST_HEAD(&obj->lut_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
@@ -4319,14 +4325,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		mutex_lock(&i915->drm.struct_mutex);
 
 		GEM_BUG_ON(i915_gem_object_is_active(obj));
-		list_for_each_entry_safe(vma, vn,
-					 &obj->vma_list, obj_link) {
+		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
 			GEM_BUG_ON(i915_vma_is_active(vma));
 			vma->flags &= ~I915_VMA_PIN_MASK;
 			i915_vma_destroy(vma);
 		}
-		GEM_BUG_ON(!list_empty(&obj->vma_list));
-		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
+		GEM_BUG_ON(!list_empty(&obj->vma.list));
+		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
 
 		/* This serializes freeing with the shrinker. Since the free
 		 * is delayed, first by RCU then by the workqueue, we want the
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index cb1b0144d274..73fec917d097 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -87,24 +87,33 @@ struct drm_i915_gem_object {
 
 	const struct drm_i915_gem_object_ops *ops;
 
-	/**
-	 * @vma_list: List of VMAs backed by this object
-	 *
-	 * The VMA on this list are ordered by type, all GGTT vma are placed
-	 * at the head and all ppGTT vma are placed at the tail. The different
-	 * types of GGTT vma are unordered between themselves, use the
-	 * @vma_tree (which has a defined order between all VMA) to find an
-	 * exact match.
-	 */
-	struct list_head vma_list;
-	/**
-	 * @vma_tree: Ordered tree of VMAs backed by this object
-	 *
-	 * All VMA created for this object are placed in the @vma_tree for
-	 * fast retrieval via a binary search in i915_vma_instance().
-	 * They are also added to @vma_list for easy iteration.
-	 */
-	struct rb_root vma_tree;
+	struct {
+		/**
+		 * @vma.lock: protect the list/tree of vmas
+		 */
+		spinlock_t lock;
+
+		/**
+		 * @vma.list: List of VMAs backed by this object
+		 *
+		 * The VMA on this list are ordered by type, all GGTT vma are
+		 * placed at the head and all ppGTT vma are placed at the tail.
+		 * The different types of GGTT vma are unordered between
+		 * themselves, use the @vma.tree (which has a defined order
+		 * between all VMA) to quickly find an exact match.
+		 */
+		struct list_head list;
+
+		/**
+		 * @vma.tree: Ordered tree of VMAs backed by this object
+		 *
+		 * All VMA created for this object are placed in the @vma.tree
+		 * for fast retrieval via a binary search in
+		 * i915_vma_instance(). They are also added to @vma.list for
+		 * easy iteration.
+		 */
+		struct rb_root tree;
+	} vma;
 
 	/**
 	 * @lut_list: List of vma lookup entries in use for this object.
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index dcbd0d345c72..d83b8ad5f859 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -187,32 +187,52 @@ vma_create(struct drm_i915_gem_object *obj,
 								i915_gem_object_get_stride(obj));
 		GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
 
+		vma->flags |= I915_VMA_GGTT;
+	}
+
+	spin_lock(&obj->vma.lock);
+
+	rb = NULL;
+	p = &obj->vma.tree.rb_node;
+	while (*p) {
+		struct i915_vma *pos;
+		long cmp;
+
+		rb = *p;
+		pos = rb_entry(rb, struct i915_vma, obj_node);
+
+		/*
+		 * If the view already exists in the tree, another thread
+		 * already created a matching vma, so return the older instance
+		 * and dispose of ours.
+		 */
+		cmp = i915_vma_compare(pos, vm, view);
+		if (cmp == 0) {
+			spin_unlock(&obj->vma.lock);
+			kmem_cache_free(vm->i915->vmas, vma);
+			return pos;
+		}
+
+		if (cmp < 0)
+			p = &rb->rb_right;
+		else
+			p = &rb->rb_left;
+	}
+	rb_link_node(&vma->obj_node, rb, p);
+	rb_insert_color(&vma->obj_node, &obj->vma.tree);
+
+	if (i915_vma_is_ggtt(vma))
 		/*
 		 * We put the GGTT vma at the start of the vma-list, followed
 		 * by the ppGGTT vma. This allows us to break early when
 		 * iterating over only the GGTT vma for an object, see
 		 * for_each_ggtt_vma()
 		 */
-		vma->flags |= I915_VMA_GGTT;
-		list_add(&vma->obj_link, &obj->vma_list);
-	} else {
-		list_add_tail(&vma->obj_link, &obj->vma_list);
-	}
+		list_add(&vma->obj_link, &obj->vma.list);
+	else
+		list_add_tail(&vma->obj_link, &obj->vma.list);
 
-	rb = NULL;
-	p = &obj->vma_tree.rb_node;
-	while (*p) {
-		struct i915_vma *pos;
-
-		rb = *p;
-		pos = rb_entry(rb, struct i915_vma, obj_node);
-		if (i915_vma_compare(pos, vm, view) < 0)
-			p = &rb->rb_right;
-		else
-			p = &rb->rb_left;
-	}
-	rb_link_node(&vma->obj_node, rb, p);
-	rb_insert_color(&vma->obj_node, &obj->vma_tree);
+	spin_unlock(&obj->vma.lock);
 
 	mutex_lock(&vm->mutex);
 	list_add(&vma->vm_link, &vm->unbound_list);
@@ -232,7 +252,7 @@ vma_lookup(struct drm_i915_gem_object *obj,
 {
 	struct rb_node *rb;
 
-	rb = obj->vma_tree.rb_node;
+	rb = obj->vma.tree.rb_node;
 	while (rb) {
 		struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
 		long cmp;
@@ -272,16 +292,18 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
 	GEM_BUG_ON(view && !i915_is_ggtt(vm));
 	GEM_BUG_ON(vm->closed);
 
+	spin_lock(&obj->vma.lock);
 	vma = vma_lookup(obj, vm, view);
-	if (!vma)
+	spin_unlock(&obj->vma.lock);
+
+	/* vma_create() will resolve the race if another creates the vma */
+	if (unlikely(!vma))
 		vma = vma_create(obj, vm, view);
 
 	GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
-	GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma);
 	return vma;
 }
 
@@ -808,14 +830,18 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 
 	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
 
-	list_del(&vma->obj_link);
-
 	mutex_lock(&vma->vm->mutex);
 	list_del(&vma->vm_link);
 	mutex_unlock(&vma->vm->mutex);
 
-	if (vma->obj)
-		rb_erase(&vma->obj_node, &vma->obj->vma_tree);
+	if (vma->obj) {
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		spin_lock(&obj->vma.lock);
+		list_del(&vma->obj_link);
+		rb_erase(&vma->obj_node, &vma->obj->vma.tree);
+		spin_unlock(&obj->vma.lock);
+	}
 
 	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
 		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 4f7c1c7599f4..7252abc73d3e 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -425,7 +425,7 @@ void i915_vma_parked(struct drm_i915_private *i915);
  * or the list is empty ofc.
  */
 #define for_each_ggtt_vma(V, OBJ) \
-	list_for_each_entry(V, &(OBJ)->vma_list, obj_link)		\
+	list_for_each_entry(V, &(OBJ)->vma.list, obj_link)		\
 		for_each_until(!i915_vma_is_ggtt(V))
 
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index f0a32edfb9b1..cf1de82741fa 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -672,7 +672,7 @@ static int igt_vma_partial(void *arg)
 		}
 
 		count = 0;
-		list_for_each_entry(vma, &obj->vma_list, obj_link)
+		list_for_each_entry(vma, &obj->vma.list, obj_link)
 			count++;
 		if (count != nvma) {
 			pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n",
@@ -701,7 +701,7 @@ static int igt_vma_partial(void *arg)
 		i915_vma_unpin(vma);
 
 		count = 0;
-		list_for_each_entry(vma, &obj->vma_list, obj_link)
+		list_for_each_entry(vma, &obj->vma.list, obj_link)
 			count++;
 		if (count != nvma) {
 			pr_err("(%s) allocated an extra full vma!\n", p->name);

From 0ca88ba0d6347cf8c4ea9f264c384594f8fefa11 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 28 Jan 2019 10:23:55 +0000
Subject: [PATCH 41/89] drm/i915: Always allocate an object/vma for the HWSP

Currently we only allocate an object and vma if we are using a GGTT
virtual HWSP, and a plain struct page for a physical HWSP. For
convenience later on with global timelines, it will be useful to always
have the status page being tracked by a struct i915_vma. Make it so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-4-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/intel_engine_cs.c       | 109 ++++++++++---------
 drivers/gpu/drm/i915/intel_guc_submission.c  |   6 +
 drivers/gpu/drm/i915/intel_lrc.c             |  12 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  21 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  23 +---
 drivers/gpu/drm/i915/selftests/mock_engine.c |   2 +-
 6 files changed, 93 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 1a5c163b98d6..2657eb6fd914 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -506,27 +506,61 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
 
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
+	struct i915_vma *vma;
+
 	/* Prevent writes into HWSP after returning the page to the system */
 	intel_engine_set_hwsp_writemask(engine, ~0u);
 
-	if (HWS_NEEDS_PHYSICAL(engine->i915)) {
-		void *addr = fetch_and_zero(&engine->status_page.page_addr);
+	vma = fetch_and_zero(&engine->status_page.vma);
+	if (!vma)
+		return;
 
-		__free_page(virt_to_page(addr));
-	}
+	if (!HWS_NEEDS_PHYSICAL(engine->i915))
+		i915_vma_unpin(vma);
 
-	i915_vma_unpin_and_release(&engine->status_page.vma,
-				   I915_VMA_RELEASE_MAP);
+	i915_gem_object_unpin_map(vma->obj);
+	__i915_gem_object_release_unless_active(vma->obj);
+}
+
+static int pin_ggtt_status_page(struct intel_engine_cs *engine,
+				struct i915_vma *vma)
+{
+	unsigned int flags;
+
+	flags = PIN_GLOBAL;
+	if (!HAS_LLC(engine->i915))
+		/*
+		 * On g33, we cannot place HWS above 256MiB, so
+		 * restrict its pinning to the low mappable arena.
+		 * Though this restriction is not documented for
+		 * gen4, gen5, or byt, they also behave similarly
+		 * and hang if the HWS is placed at the top of the
+		 * GTT. To generalise, it appears that all !llc
+		 * platforms have issues with us placing the HWS
+		 * above the mappable region (even though we never
+		 * actually map it).
+		 */
+		flags |= PIN_MAPPABLE;
+	else
+		flags |= PIN_HIGH;
+
+	return i915_vma_pin(vma, 0, 0, flags);
 }
 
 static int init_status_page(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
-	unsigned int flags;
 	void *vaddr;
 	int ret;
 
+	/*
+	 * Though the HWS register does support 36bit addresses, historically
+	 * we have had hangs and corruption reported due to wild writes if
+	 * the HWS is placed above 4G. We only allow objects to be allocated
+	 * in GFP_DMA32 for i965, and no earlier physical address users had
+	 * access to more than 4G.
+	 */
 	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("Failed to allocate status page\n");
@@ -543,61 +577,30 @@ static int init_status_page(struct intel_engine_cs *engine)
 		goto err;
 	}
 
-	flags = PIN_GLOBAL;
-	if (!HAS_LLC(engine->i915))
-		/* On g33, we cannot place HWS above 256MiB, so
-		 * restrict its pinning to the low mappable arena.
-		 * Though this restriction is not documented for
-		 * gen4, gen5, or byt, they also behave similarly
-		 * and hang if the HWS is placed at the top of the
-		 * GTT. To generalise, it appears that all !llc
-		 * platforms have issues with us placing the HWS
-		 * above the mappable region (even though we never
-		 * actually map it).
-		 */
-		flags |= PIN_MAPPABLE;
-	else
-		flags |= PIN_HIGH;
-	ret = i915_vma_pin(vma, 0, 0, flags);
-	if (ret)
-		goto err;
-
 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
-		goto err_unpin;
+		goto err;
 	}
 
+	engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
 	engine->status_page.vma = vma;
-	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
-	engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
+
+	if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
+		ret = pin_ggtt_status_page(engine, vma);
+		if (ret)
+			goto err_unpin;
+	}
+
 	return 0;
 
 err_unpin:
-	i915_vma_unpin(vma);
+	i915_gem_object_unpin_map(obj);
 err:
 	i915_gem_object_put(obj);
 	return ret;
 }
 
-static int init_phys_status_page(struct intel_engine_cs *engine)
-{
-	struct page *page;
-
-	/*
-	 * Though the HWS register does support 36bit addresses, historically
-	 * we have had hangs and corruption reported due to wild writes if
-	 * the HWS is placed above 4G.
-	 */
-	page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO);
-	if (!page)
-		return -ENOMEM;
-
-	engine->status_page.page_addr = page_address(page);
-
-	return 0;
-}
-
 static void __intel_context_unpin(struct i915_gem_context *ctx,
 				  struct intel_engine_cs *engine)
 {
@@ -690,10 +693,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	if (ret)
 		goto err_unpin_preempt;
 
-	if (HWS_NEEDS_PHYSICAL(i915))
-		ret = init_phys_status_page(engine);
-	else
-		ret = init_status_page(engine);
+	ret = init_status_page(engine);
 	if (ret)
 		goto err_breadcrumbs;
 
@@ -1366,7 +1366,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 	}
 
 	if (HAS_EXECLISTS(dev_priv)) {
-		const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+		const u32 *hws =
+			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 		unsigned int idx;
 		u8 read, write;
 
@@ -1549,7 +1550,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_unlock_irqrestore(&b->rb_lock, flags);
 
 	drm_printf(m, "HWSP:\n");
-	hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
+	hexdump(m, engine->status_page.addr, PAGE_SIZE);
 
 	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 45e2db683fe5..4295ade0d613 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -81,6 +81,12 @@
  *
  */
 
+static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_PREEMPT_ADDR);
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 185867106c14..2cf99c436658 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -172,6 +172,12 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_INDEX_ADDR);
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -1699,7 +1705,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
 		   _MASKED_BIT_DISABLE(STOP_RING));
 
 	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
-		   engine->status_page.ggtt_offset);
+		   i915_ggtt_offset(engine->status_page.vma));
 	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
 }
 
@@ -2244,10 +2250,10 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	}
 
 	execlists->csb_status =
-		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
 	execlists->csb_write =
-		&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
 
 	reset_csb_pointers(execlists);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a9efc8c71254..cb6d2aa2a829 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -43,6 +43,12 @@
  */
 #define LEGACY_REQUEST_SIZE 200
 
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_INDEX_ADDR);
+}
+
 static unsigned int __intel_ring_space(unsigned int head,
 				       unsigned int tail,
 				       unsigned int size)
@@ -503,12 +509,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
 	I915_WRITE(HWS_PGA, addr);
 }
 
+static struct page *status_page(struct intel_engine_cs *engine)
+{
+	struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	return sg_page(obj->mm.pages->sgl);
+}
+
 static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
 {
-	struct page *page = virt_to_page(engine->status_page.page_addr);
-	phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
-
-	set_hws_pga(engine, phys);
+	set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
 	set_hwstam(engine, ~0u);
 }
 
@@ -575,7 +586,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
 
 static void ring_setup_status_page(struct intel_engine_cs *engine)
 {
-	set_hwsp(engine, engine->status_page.ggtt_offset);
+	set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
 	set_hwstam(engine, ~0u);
 
 	flush_cs_tlb(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f2effd001540..32371ae67f24 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -32,8 +32,7 @@ struct i915_sched_attr;
 
 struct intel_hw_status_page {
 	struct i915_vma *vma;
-	u32 *page_addr;
-	u32 ggtt_offset;
+	u32 *addr;
 };
 
 #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@@ -671,7 +670,7 @@ static inline u32
 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 {
 	/* Ensure that the compiler doesn't optimize away the load. */
-	return READ_ONCE(engine->status_page.page_addr[reg]);
+	return READ_ONCE(engine->status_page.addr[reg]);
 }
 
 static inline void
@@ -684,12 +683,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 	 */
 	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		mb();
-		clflush(&engine->status_page.page_addr[reg]);
-		engine->status_page.page_addr[reg] = value;
-		clflush(&engine->status_page.page_addr[reg]);
+		clflush(&engine->status_page.addr[reg]);
+		engine->status_page.addr[reg] = value;
+		clflush(&engine->status_page.addr[reg]);
 		mb();
 	} else {
-		WRITE_ONCE(engine->status_page.page_addr[reg], value);
+		WRITE_ONCE(engine->status_page.addr[reg], value);
 	}
 }
 
@@ -877,16 +876,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
-static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
-{
-	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
-}
-
-static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
-{
-	return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
-}
-
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 905318b7ae18..4e5b4dc6df0f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -200,7 +200,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.i915 = i915;
 	snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
 	engine->base.id = id;
-	engine->base.status_page.page_addr = (void *)(engine + 1);
+	engine->base.status_page.addr = (void *)(engine + 1);
 
 	engine->base.context_pin = mock_context_pin;
 	engine->base.request_alloc = mock_request_alloc;

From 1e345568e3b541e19202caadae8d2cb2237e7ed8 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 28 Jan 2019 10:23:56 +0000
Subject: [PATCH 42/89] drm/i915: Move list of timelines under its own lock

Currently, the list of timelines is serialised by the struct_mutex, but
to alleviate difficulties with using that mutex in future, move the
list management under its own dedicated mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-5-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_drv.h               |   5 +-
 drivers/gpu/drm/i915/i915_gem.c               | 103 ++++++++++--------
 drivers/gpu/drm/i915/i915_reset.c             |   8 +-
 drivers/gpu/drm/i915/i915_timeline.c          |  38 ++++++-
 drivers/gpu/drm/i915/i915_timeline.h          |   3 +
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   7 +-
 .../gpu/drm/i915/selftests/mock_timeline.c    |   3 +-
 7 files changed, 109 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0133d1da3d3c..8a181b455197 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1975,7 +1975,10 @@ struct drm_i915_private {
 		void (*resume)(struct drm_i915_private *);
 		void (*cleanup_engine)(struct intel_engine_cs *engine);
 
-		struct list_head timelines;
+		struct i915_gt_timelines {
+			struct mutex mutex; /* protects list, tainted by GPU */
+			struct list_head list;
+		} timelines;
 
 		struct list_head active_rings;
 		struct list_head closed_vma;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 15acd052da46..761714448ff3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3222,33 +3222,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	return ret;
 }
 
-static long wait_for_timeline(struct i915_timeline *tl,
-			      unsigned int flags, long timeout)
-{
-	struct i915_request *rq;
-
-	rq = i915_gem_active_get_unlocked(&tl->last_request);
-	if (!rq)
-		return timeout;
-
-	/*
-	 * "Race-to-idle".
-	 *
-	 * Switching to the kernel context is often used a synchronous
-	 * step prior to idling, e.g. in suspend for flushing all
-	 * current operations to memory before sleeping. These we
-	 * want to complete as quickly as possible to avoid prolonged
-	 * stalls, so allow the gpu to boost to maximum clocks.
-	 */
-	if (flags & I915_WAIT_FOR_IDLE_BOOST)
-		gen6_rps_boost(rq, NULL);
-
-	timeout = i915_request_wait(rq, flags, timeout);
-	i915_request_put(rq);
-
-	return timeout;
-}
-
 static int wait_for_engines(struct drm_i915_private *i915)
 {
 	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
@@ -3262,6 +3235,52 @@ static int wait_for_engines(struct drm_i915_private *i915)
 	return 0;
 }
 
+static long
+wait_for_timelines(struct drm_i915_private *i915,
+		   unsigned int flags, long timeout)
+{
+	struct i915_gt_timelines *gt = &i915->gt.timelines;
+	struct i915_timeline *tl;
+
+	if (!READ_ONCE(i915->gt.active_requests))
+		return timeout;
+
+	mutex_lock(&gt->mutex);
+	list_for_each_entry(tl, &gt->list, link) {
+		struct i915_request *rq;
+
+		rq = i915_gem_active_get_unlocked(&tl->last_request);
+		if (!rq)
+			continue;
+
+		mutex_unlock(&gt->mutex);
+
+		/*
+		 * "Race-to-idle".
+		 *
+		 * Switching to the kernel context is often used a synchronous
+		 * step prior to idling, e.g. in suspend for flushing all
+		 * current operations to memory before sleeping. These we
+		 * want to complete as quickly as possible to avoid prolonged
+		 * stalls, so allow the gpu to boost to maximum clocks.
+		 */
+		if (flags & I915_WAIT_FOR_IDLE_BOOST)
+			gen6_rps_boost(rq, NULL);
+
+		timeout = i915_request_wait(rq, flags, timeout);
+		i915_request_put(rq);
+		if (timeout < 0)
+			return timeout;
+
+		/* restart after reacquiring the lock */
+		mutex_lock(&gt->mutex);
+		tl = list_entry(&gt->list, typeof(*tl), link);
+	}
+	mutex_unlock(&gt->mutex);
+
+	return timeout;
+}
+
 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 			   unsigned int flags, long timeout)
 {
@@ -3273,17 +3292,15 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	if (!READ_ONCE(i915->gt.awake))
 		return 0;
 
+	timeout = wait_for_timelines(i915, flags, timeout);
+	if (timeout < 0)
+		return timeout;
+
 	if (flags & I915_WAIT_LOCKED) {
-		struct i915_timeline *tl;
 		int err;
 
 		lockdep_assert_held(&i915->drm.struct_mutex);
 
-		list_for_each_entry(tl, &i915->gt.timelines, link) {
-			timeout = wait_for_timeline(tl, flags, timeout);
-			if (timeout < 0)
-				return timeout;
-		}
 		if (GEM_SHOW_DEBUG() && !timeout) {
 			/* Presume that timeout was non-zero to begin with! */
 			dev_warn(&i915->drm.pdev->dev,
@@ -3297,17 +3314,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 
 		i915_retire_requests(i915);
 		GEM_BUG_ON(i915->gt.active_requests);
-	} else {
-		struct intel_engine_cs *engine;
-		enum intel_engine_id id;
-
-		for_each_engine(engine, i915, id) {
-			struct i915_timeline *tl = &engine->timeline;
-
-			timeout = wait_for_timeline(tl, flags, timeout);
-			if (timeout < 0)
-				return timeout;
-		}
 	}
 
 	return 0;
@@ -5008,6 +5014,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
 	}
 
+	i915_timelines_init(dev_priv);
+
 	ret = i915_gem_init_userptr(dev_priv);
 	if (ret)
 		return ret;
@@ -5130,8 +5138,10 @@ err_unlock:
 err_uc_misc:
 	intel_uc_fini_misc(dev_priv);
 
-	if (ret != -EIO)
+	if (ret != -EIO) {
 		i915_gem_cleanup_userptr(dev_priv);
+		i915_timelines_fini(dev_priv);
+	}
 
 	if (ret == -EIO) {
 		mutex_lock(&dev_priv->drm.struct_mutex);
@@ -5182,6 +5192,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
 
 	intel_uc_fini_misc(dev_priv);
 	i915_gem_cleanup_userptr(dev_priv);
+	i915_timelines_fini(dev_priv);
 
 	i915_gem_drain_freed_objects(dev_priv);
 
@@ -5284,7 +5295,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 	if (!dev_priv->priorities)
 		goto err_dependencies;
 
-	INIT_LIST_HEAD(&dev_priv->gt.timelines);
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
 
@@ -5328,7 +5338,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
 	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
 	WARN_ON(dev_priv->mm.object_count);
-	WARN_ON(!list_empty(&dev_priv->gt.timelines));
 
 	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 99bd3bc336b3..d2dca85a543d 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -854,7 +854,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	 *
 	 * No more can be submitted until we reset the wedged bit.
 	 */
-	list_for_each_entry(tl, &i915->gt.timelines, link) {
+	mutex_lock(&i915->gt.timelines.mutex);
+	list_for_each_entry(tl, &i915->gt.timelines.list, link) {
 		struct i915_request *rq;
 		long timeout;
 
@@ -876,9 +877,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 		timeout = dma_fence_default_wait(&rq->fence, true,
 						 MAX_SCHEDULE_TIMEOUT);
 		i915_request_put(rq);
-		if (timeout < 0)
+		if (timeout < 0) {
+			mutex_unlock(&i915->gt.timelines.mutex);
 			goto unlock;
+		}
 	}
+	mutex_unlock(&i915->gt.timelines.mutex);
 
 	intel_engines_sanitize(i915, false);
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..84550f17d3df 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -13,7 +13,7 @@ void i915_timeline_init(struct drm_i915_private *i915,
 			struct i915_timeline *timeline,
 			const char *name)
 {
-	lockdep_assert_held(&i915->drm.struct_mutex);
+	struct i915_gt_timelines *gt = &i915->gt.timelines;
 
 	/*
 	 * Ideally we want a set of engines on a single leaf as we expect
@@ -23,9 +23,12 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	 */
 	BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
 
+	timeline->i915 = i915;
 	timeline->name = name;
 
-	list_add(&timeline->link, &i915->gt.timelines);
+	mutex_lock(&gt->mutex);
+	list_add(&timeline->link, &gt->list);
+	mutex_unlock(&gt->mutex);
 
 	/* Called during early_init before we know how many engines there are */
 
@@ -39,6 +42,17 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	i915_syncmap_init(&timeline->sync);
 }
 
+void i915_timelines_init(struct drm_i915_private *i915)
+{
+	struct i915_gt_timelines *gt = &i915->gt.timelines;
+
+	mutex_init(&gt->mutex);
+	INIT_LIST_HEAD(&gt->list);
+
+	/* via i915_gem_wait_for_idle() */
+	i915_gem_shrinker_taints_mutex(i915, &gt->mutex);
+}
+
 /**
  * i915_timelines_park - called when the driver idles
  * @i915: the drm_i915_private device
@@ -51,11 +65,11 @@ void i915_timeline_init(struct drm_i915_private *i915,
  */
 void i915_timelines_park(struct drm_i915_private *i915)
 {
+	struct i915_gt_timelines *gt = &i915->gt.timelines;
 	struct i915_timeline *timeline;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	list_for_each_entry(timeline, &i915->gt.timelines, link) {
+	mutex_lock(&gt->mutex);
+	list_for_each_entry(timeline, &gt->list, link) {
 		/*
 		 * All known fences are completed so we can scrap
 		 * the current sync point tracking and start afresh,
@@ -64,15 +78,20 @@ void i915_timelines_park(struct drm_i915_private *i915)
 		 */
 		i915_syncmap_free(&timeline->sync);
 	}
+	mutex_unlock(&gt->mutex);
 }
 
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
+	struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
+
 	GEM_BUG_ON(!list_empty(&timeline->requests));
 
 	i915_syncmap_free(&timeline->sync);
 
+	mutex_lock(&gt->mutex);
 	list_del(&timeline->link);
+	mutex_unlock(&gt->mutex);
 }
 
 struct i915_timeline *
@@ -99,6 +118,15 @@ void __i915_timeline_free(struct kref *kref)
 	kfree(timeline);
 }
 
+void i915_timelines_fini(struct drm_i915_private *i915)
+{
+	struct i915_gt_timelines *gt = &i915->gt.timelines;
+
+	GEM_BUG_ON(!list_empty(&gt->list));
+
+	mutex_destroy(&gt->mutex);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_timeline.c"
 #include "selftests/i915_timeline.c"
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 38c1e15e927a..87ad2dd31c20 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -66,6 +66,7 @@ struct i915_timeline {
 
 	struct list_head link;
 	const char *name;
+	struct drm_i915_private *i915;
 
 	struct kref kref;
 };
@@ -134,6 +135,8 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 	return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
 }
 
+void i915_timelines_init(struct drm_i915_private *i915);
 void i915_timelines_park(struct drm_i915_private *i915);
+void i915_timelines_fini(struct drm_i915_private *i915);
 
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 8ab5a2688a0c..14ae46fda49f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -68,13 +68,14 @@ static void mock_device_release(struct drm_device *dev)
 	i915_gem_contexts_fini(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
+	i915_timelines_fini(i915);
+
 	drain_workqueue(i915->wq);
 	i915_gem_drain_freed_objects(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
 	mock_fini_ggtt(&i915->ggtt);
 	mutex_unlock(&i915->drm.struct_mutex);
-	WARN_ON(!list_empty(&i915->gt.timelines));
 
 	destroy_workqueue(i915->wq);
 
@@ -226,7 +227,8 @@ struct drm_i915_private *mock_gem_device(void)
 	if (!i915->priorities)
 		goto err_dependencies;
 
-	INIT_LIST_HEAD(&i915->gt.timelines);
+	i915_timelines_init(i915);
+
 	INIT_LIST_HEAD(&i915->gt.active_rings);
 	INIT_LIST_HEAD(&i915->gt.closed_vma);
 
@@ -253,6 +255,7 @@ err_context:
 	i915_gem_contexts_fini(i915);
 err_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
+	i915_timelines_fini(i915);
 	kmem_cache_destroy(i915->priorities);
 err_dependencies:
 	kmem_cache_destroy(i915->dependencies);
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..cf39ccd9fc05 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -10,6 +10,7 @@
 
 void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 {
+	timeline->i915 = NULL;
 	timeline->fence_context = context;
 
 	spin_lock_init(&timeline->lock);
@@ -24,5 +25,5 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 void mock_timeline_fini(struct i915_timeline *timeline)
 {
-	i915_timeline_fini(timeline);
+	i915_syncmap_free(&timeline->sync);
 }

From 3adac4689f58cb3fb666d92dff0ee73cc97d24d7 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 28 Jan 2019 18:18:07 +0000
Subject: [PATCH 43/89] drm/i915: Introduce concept of per-timeline (context)
 HWSP

Supplement the per-engine HWSP with a per-timeline HWSP. That is a
per-request pointer through which we can check a local seqno,
abstracting away the presumption of a global seqno. In this first step,
we point each request back into the engine's HWSP so everything
continues to work with the global timeline.

v2: s/i915_request_hwsp/hwsp_seqno/ to emphasis that this is the current
HW value and that we are accessing it via i915_request merely as a
convenience.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_request.c | 16 ++++++----
 drivers/gpu/drm/i915/i915_request.h | 45 ++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/intel_lrc.c    |  9 ++++--
 3 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f4241a17e2ad..a076fd0b7ba6 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request)
 static void __retire_engine_request(struct intel_engine_cs *engine,
 				    struct i915_request *rq)
 {
-	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n",
 		  __func__, engine->name,
 		  rq->fence.context, rq->fence.seqno,
 		  rq->global_seqno,
+		  hwsp_seqno(rq),
 		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!i915_request_completed(rq));
@@ -244,10 +245,11 @@ static void i915_request_retire(struct i915_request *request)
 {
 	struct i915_gem_active *active, *next;
 
-	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
 		  request->engine->name,
 		  request->fence.context, request->fence.seqno,
 		  request->global_seqno,
+		  hwsp_seqno(request),
 		  intel_engine_get_seqno(request->engine));
 
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
@@ -307,10 +309,11 @@ void i915_request_retire_upto(struct i915_request *rq)
 	struct intel_ring *ring = rq->ring;
 	struct i915_request *tmp;
 
-	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
 		  rq->engine->name,
 		  rq->fence.context, rq->fence.seqno,
 		  rq->global_seqno,
+		  hwsp_seqno(rq),
 		  intel_engine_get_seqno(rq->engine));
 
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
@@ -355,10 +358,11 @@ void __i915_request_submit(struct i915_request *request)
 	struct intel_engine_cs *engine = request->engine;
 	u32 seqno;
 
-	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  engine->timeline.seqno + 1,
+		  hwsp_seqno(request),
 		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!irqs_disabled());
@@ -405,10 +409,11 @@ void __i915_request_unsubmit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 
-	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  request->global_seqno,
+		  hwsp_seqno(request),
 		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!irqs_disabled());
@@ -616,6 +621,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->ring = ce->ring;
 	rq->timeline = ce->ring->timeline;
 	GEM_BUG_ON(rq->timeline == &engine->timeline);
+	rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX];
 
 	spin_lock_init(&rq->lock);
 	dma_fence_init(&rq->fence,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index c0f084ca4f29..ade010fe6e26 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -130,6 +130,13 @@ struct i915_request {
 	struct i915_sched_node sched;
 	struct i915_dependency dep;
 
+	/*
+	 * A convenience pointer to the current breadcrumb value stored in
+	 * the HW status page (or our timeline's local equivalent). The full
+	 * path would be rq->hw_context->ring->timeline->hwsp_seqno.
+	 */
+	const u32 *hwsp_seqno;
+
 	/**
 	 * GEM sequence number associated with this request on the
 	 * global execution timeline. It is zero when the request is not
@@ -285,11 +292,6 @@ static inline bool i915_request_signaled(const struct i915_request *rq)
 	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
 }
 
-static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
-					    u32 seqno);
-static inline bool intel_engine_has_completed(struct intel_engine_cs *engine,
-					      u32 seqno);
-
 /**
  * Returns true if seq1 is later than seq2.
  */
@@ -298,6 +300,35 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
 	return (s32)(seq1 - seq2) >= 0;
 }
 
+static inline u32 __hwsp_seqno(const struct i915_request *rq)
+{
+	return READ_ONCE(*rq->hwsp_seqno);
+}
+
+/**
+ * hwsp_seqno - the current breadcrumb value in the HW status page
+ * @rq: the request, to chase the relevant HW status page
+ *
+ * The emphasis in naming here is that hwsp_seqno() is not a property of the
+ * request, but an indication of the current HW state (associated with this
+ * request). Its value will change as the GPU executes more requests.
+ *
+ * Returns the current breadcrumb value in the associated HW status page (or
+ * the local timeline's equivalent) for this request. The request itself
+ * has the associated breadcrumb value of rq->fence.seqno, when the HW
+ * status page has that breadcrumb or later, this request is complete.
+ */
+static inline u32 hwsp_seqno(const struct i915_request *rq)
+{
+	u32 seqno;
+
+	rcu_read_lock(); /* the HWSP may be freed at runtime */
+	seqno = __hwsp_seqno(rq);
+	rcu_read_unlock();
+
+	return seqno;
+}
+
 /**
  * i915_request_started - check if the request has begun being executed
  * @rq: the request
@@ -315,14 +346,14 @@ static inline bool i915_request_started(const struct i915_request *rq)
 	if (!seqno) /* not yet submitted to HW */
 		return false;
 
-	return intel_engine_has_started(rq->engine, seqno);
+	return i915_seqno_passed(hwsp_seqno(rq), seqno - 1);
 }
 
 static inline bool
 __i915_request_completed(const struct i915_request *rq, u32 seqno)
 {
 	GEM_BUG_ON(!seqno);
-	return intel_engine_has_completed(rq->engine, seqno) &&
+	return i915_seqno_passed(hwsp_seqno(rq), seqno) &&
 		seqno == i915_request_global_seqno(rq);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2cf99c436658..9ae7f77293a0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -446,11 +446,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
 
-			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
 				  engine->name, n,
 				  port[n].context_id, count,
 				  rq->global_seqno,
 				  rq->fence.context, rq->fence.seqno,
+				  hwsp_seqno(rq),
 				  intel_engine_get_seqno(engine),
 				  rq_prio(rq));
 		} else {
@@ -742,11 +743,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 	while (num_ports-- && port_isset(port)) {
 		struct i915_request *rq = port_request(port);
 
-		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n",
+		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n",
 			  rq->engine->name,
 			  (unsigned int)(port - execlists->port),
 			  rq->global_seqno,
 			  rq->fence.context, rq->fence.seqno,
+			  hwsp_seqno(rq),
 			  intel_engine_get_seqno(rq->engine));
 
 		GEM_BUG_ON(!execlists->active);
@@ -970,12 +972,13 @@ static void process_csb(struct intel_engine_cs *engine)
 						EXECLISTS_ACTIVE_USER));
 
 		rq = port_unpack(port, &count);
-		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
 			  engine->name,
 			  port->context_id, count,
 			  rq ? rq->global_seqno : 0,
 			  rq ? rq->fence.context : 0,
 			  rq ? rq->fence.seqno : 0,
+			  rq ? hwsp_seqno(rq) : 0,
 			  intel_engine_get_seqno(engine),
 			  rq ? rq_prio(rq) : 0);
 

From b18fe4be59f215b1ce75f406d04810454f206faf Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 28 Jan 2019 18:18:08 +0000
Subject: [PATCH 44/89] drm/i915: Enlarge vma->pin_count

Previously we only accommodated having a vma pinned by a small number of
users, with the maximum being pinned for use by the display engine. As
such, we used a small bitfield only large enough to allow the vma to
be pinned twice (for back/front buffers) in each scanout plane. Keeping
the maximum permissible pin_count small allows us to quickly catch a
potential leak. However, as we want to split a 4096B page into 64
different cachelines and pin each cacheline for use by a different
timeline, we will exceed the current maximum permissible vma->pin_count
and so time has come to enlarge it.

Whilst we are here, try to pull together the similar bits:

Address/layout specification:
 - bias, mappable, zone_4g: address limit specifiers
 - fixed: address override, limits still apply though
 - high: not strictly an address limit, but an address direction to search

Search controls:
 - nonblock, nonfault, noevict

v2: Rewrite the guideline comment on bit consumption.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: John Harrison <john.C.Harrison@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-2-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem_gtt.h | 22 +++++++-------
 drivers/gpu/drm/i915/i915_vma.h     | 45 +++++++++++++++++++----------
 2 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index bd679c8c56dd..03ade71b8d9a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -642,19 +642,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 
 /* Flags used by pin/bind&friends. */
 #define PIN_NONBLOCK		BIT_ULL(0)
-#define PIN_MAPPABLE		BIT_ULL(1)
-#define PIN_ZONE_4G		BIT_ULL(2)
-#define PIN_NONFAULT		BIT_ULL(3)
-#define PIN_NOEVICT		BIT_ULL(4)
+#define PIN_NONFAULT		BIT_ULL(1)
+#define PIN_NOEVICT		BIT_ULL(2)
+#define PIN_MAPPABLE		BIT_ULL(3)
+#define PIN_ZONE_4G		BIT_ULL(4)
+#define PIN_HIGH		BIT_ULL(5)
+#define PIN_OFFSET_BIAS		BIT_ULL(6)
+#define PIN_OFFSET_FIXED	BIT_ULL(7)
 
-#define PIN_MBZ			BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */
-#define PIN_GLOBAL		BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */
-#define PIN_USER		BIT_ULL(7) /* I915_VMA_LOCAL_BIND */
-#define PIN_UPDATE		BIT_ULL(8)
+#define PIN_MBZ			BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */
+#define PIN_GLOBAL		BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */
+#define PIN_USER		BIT_ULL(10) /* I915_VMA_LOCAL_BIND */
+#define PIN_UPDATE		BIT_ULL(11)
 
-#define PIN_HIGH		BIT_ULL(9)
-#define PIN_OFFSET_BIAS		BIT_ULL(10)
-#define PIN_OFFSET_FIXED	BIT_ULL(11)
 #define PIN_OFFSET_MASK		(-I915_GTT_PAGE_SIZE)
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 7252abc73d3e..5793abe509a2 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -71,29 +71,42 @@ struct i915_vma {
 	unsigned int open_count;
 	unsigned long flags;
 	/**
-	 * How many users have pinned this object in GTT space. The following
-	 * users can each hold at most one reference: pwrite/pread, execbuffer
-	 * (objects are not allowed multiple times for the same batchbuffer),
-	 * and the framebuffer code. When switching/pageflipping, the
-	 * framebuffer code has at most two buffers pinned per crtc.
+	 * How many users have pinned this object in GTT space.
 	 *
-	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
-	 * bits with absolutely no headroom. So use 4 bits.
+	 * This is a tightly bound, fairly small number of users, so we
+	 * stuff inside the flags field so that we can both check for overflow
+	 * and detect a no-op i915_vma_pin() in a single check, while also
+	 * pinning the vma.
+	 *
+	 * The worst case display setup would have the same vma pinned for
+	 * use on each plane on each crtc, while also building the next atomic
+	 * state and holding a pin for the length of the cleanup queue. In the
+	 * future, the flip queue may be increased from 1.
+	 * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84
+	 *
+	 * For GEM, the number of concurrent users for pwrite/pread is
+	 * unbounded. For execbuffer, it is currently one but will in future
+	 * be extended to allow multiple clients to pin vma concurrently.
+	 *
+	 * We also use suballocated pages, with each suballocation claiming
+	 * its own pin on the shared vma. At present, this is limited to
+	 * exclusive cachelines of a single page, so a maximum of 64 possible
+	 * users.
 	 */
-#define I915_VMA_PIN_MASK 0xf
-#define I915_VMA_PIN_OVERFLOW	BIT(5)
+#define I915_VMA_PIN_MASK 0xff
+#define I915_VMA_PIN_OVERFLOW	BIT(8)
 
 	/** Flags and address space this VMA is bound to */
-#define I915_VMA_GLOBAL_BIND	BIT(6)
-#define I915_VMA_LOCAL_BIND	BIT(7)
+#define I915_VMA_GLOBAL_BIND	BIT(9)
+#define I915_VMA_LOCAL_BIND	BIT(10)
 #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
 
-#define I915_VMA_GGTT		BIT(8)
-#define I915_VMA_CAN_FENCE	BIT(9)
-#define I915_VMA_CLOSED		BIT(10)
-#define I915_VMA_USERFAULT_BIT	11
+#define I915_VMA_GGTT		BIT(11)
+#define I915_VMA_CAN_FENCE	BIT(12)
+#define I915_VMA_CLOSED		BIT(13)
+#define I915_VMA_USERFAULT_BIT	14
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
-#define I915_VMA_GGTT_WRITE	BIT(12)
+#define I915_VMA_GGTT_WRITE	BIT(15)
 
 	unsigned int active_count;
 	struct rb_root active;

From 52954edd1f7030f753a63093c16826ef50805098 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 28 Jan 2019 18:18:09 +0000
Subject: [PATCH 45/89] drm/i915: Allocate a status page for each timeline

Allocate a page for use as a status page by a group of timelines, as we
only need a dword of storage for each (rounded up to the cacheline for
safety) we can pack multiple timelines into the same page. Each timeline
will then be able to track its own HW seqno.

v2: Reuse the common per-engine HWSP for the solitary ringbuffer
timeline, so that we do not have to emit (using per-gen specialised
vfuncs) the breadcrumb into the distinct timeline HWSP and instead can
keep on using the common MI_STORE_DWORD_INDEX. However, to maintain the
sleight-of-hand for the global/per-context seqno switchover, we will
store both temporarily (and so use a custom offset for the shared timeline
HWSP until the switch over).

v3: Keep things simple and allocate a page for each timeline, page
sharing comes next.

v4: I was caught repeating the same MI_STORE_DWORD_IMM over and over
again in selftests.

v5: And caught red handed copying create timeline + check.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-3-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_timeline.c          | 121 ++++++-
 drivers/gpu/drm/i915/i915_timeline.h          |  21 +-
 drivers/gpu/drm/i915/intel_engine_cs.c        |  76 ++--
 drivers/gpu/drm/i915/intel_lrc.c              |  22 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  10 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h       |   6 +-
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 .../drm/i915/selftests/i915_mock_selftests.h  |   2 +-
 .../gpu/drm/i915/selftests/i915_timeline.c    | 326 +++++++++++++++++-
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  14 +-
 10 files changed, 543 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 84550f17d3df..8d5792311a8f 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -9,28 +9,78 @@
 #include "i915_timeline.h"
 #include "i915_syncmap.h"
 
-void i915_timeline_init(struct drm_i915_private *i915,
-			struct i915_timeline *timeline,
-			const char *name)
+static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+
+	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	if (IS_ERR(vma))
+		i915_gem_object_put(obj);
+
+	return vma;
+}
+
+static int hwsp_alloc(struct i915_timeline *timeline)
+{
+	struct i915_vma *vma;
+
+	vma = __hwsp_alloc(timeline->i915);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	timeline->hwsp_ggtt = vma;
+	timeline->hwsp_offset = 0;
+
+	return 0;
+}
+
+int i915_timeline_init(struct drm_i915_private *i915,
+		       struct i915_timeline *timeline,
+		       const char *name,
+		       struct i915_vma *global_hwsp)
 {
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
+	void *vaddr;
+	int err;
 
 	/*
 	 * Ideally we want a set of engines on a single leaf as we expect
 	 * to mostly be tracking synchronisation between engines. It is not
 	 * a huge issue if this is not the case, but we may want to mitigate
 	 * any page crossing penalties if they become an issue.
+	 *
+	 * Called during early_init before we know how many engines there are.
 	 */
 	BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
 
 	timeline->i915 = i915;
 	timeline->name = name;
+	timeline->pin_count = 0;
 
-	mutex_lock(&gt->mutex);
-	list_add(&timeline->link, &gt->list);
-	mutex_unlock(&gt->mutex);
+	if (global_hwsp) {
+		timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
+		timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
+	} else {
+		err = hwsp_alloc(timeline);
+		if (err)
+			return err;
+	}
 
-	/* Called during early_init before we know how many engines there are */
+	vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		i915_vma_put(timeline->hwsp_ggtt);
+		return PTR_ERR(vaddr);
+	}
+
+	timeline->hwsp_seqno =
+		memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
 
 	timeline->fence_context = dma_fence_context_alloc(1);
 
@@ -40,6 +90,12 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	mutex_lock(&gt->mutex);
+	list_add(&timeline->link, &gt->list);
+	mutex_unlock(&gt->mutex);
+
+	return 0;
 }
 
 void i915_timelines_init(struct drm_i915_private *i915)
@@ -85,6 +141,7 @@ void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
 
+	GEM_BUG_ON(timeline->pin_count);
 	GEM_BUG_ON(!list_empty(&timeline->requests));
 
 	i915_syncmap_free(&timeline->sync);
@@ -92,23 +149,69 @@ void i915_timeline_fini(struct i915_timeline *timeline)
 	mutex_lock(&gt->mutex);
 	list_del(&timeline->link);
 	mutex_unlock(&gt->mutex);
+
+	i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+	i915_vma_put(timeline->hwsp_ggtt);
 }
 
 struct i915_timeline *
-i915_timeline_create(struct drm_i915_private *i915, const char *name)
+i915_timeline_create(struct drm_i915_private *i915,
+		     const char *name,
+		     struct i915_vma *global_hwsp)
 {
 	struct i915_timeline *timeline;
+	int err;
 
 	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
 	if (!timeline)
 		return ERR_PTR(-ENOMEM);
 
-	i915_timeline_init(i915, timeline, name);
+	err = i915_timeline_init(i915, timeline, name, global_hwsp);
+	if (err) {
+		kfree(timeline);
+		return ERR_PTR(err);
+	}
+
 	kref_init(&timeline->kref);
 
 	return timeline;
 }
 
+int i915_timeline_pin(struct i915_timeline *tl)
+{
+	int err;
+
+	if (tl->pin_count++)
+		return 0;
+	GEM_BUG_ON(!tl->pin_count);
+
+	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	if (err)
+		goto unpin;
+
+	return 0;
+
+unpin:
+	tl->pin_count = 0;
+	return err;
+}
+
+void i915_timeline_unpin(struct i915_timeline *tl)
+{
+	GEM_BUG_ON(!tl->pin_count);
+	if (--tl->pin_count)
+		return;
+
+	/*
+	 * Since this timeline is idle, all bariers upon which we were waiting
+	 * must also be complete and so we can discard the last used barriers
+	 * without loss of information.
+	 */
+	i915_syncmap_free(&tl->sync);
+
+	__i915_vma_unpin(tl->hwsp_ggtt);
+}
+
 void __i915_timeline_free(struct kref *kref)
 {
 	struct i915_timeline *timeline =
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 87ad2dd31c20..0c3739d53d79 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -32,6 +32,8 @@
 #include "i915_syncmap.h"
 #include "i915_utils.h"
 
+struct i915_vma;
+
 struct i915_timeline {
 	u64 fence_context;
 	u32 seqno;
@@ -40,6 +42,11 @@ struct i915_timeline {
 #define TIMELINE_CLIENT 0 /* default subclass */
 #define TIMELINE_ENGINE 1
 
+	unsigned int pin_count;
+	const u32 *hwsp_seqno;
+	struct i915_vma *hwsp_ggtt;
+	u32 hwsp_offset;
+
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
 	 * outstanding.
@@ -71,9 +78,10 @@ struct i915_timeline {
 	struct kref kref;
 };
 
-void i915_timeline_init(struct drm_i915_private *i915,
-			struct i915_timeline *tl,
-			const char *name);
+int i915_timeline_init(struct drm_i915_private *i915,
+		       struct i915_timeline *tl,
+		       const char *name,
+		       struct i915_vma *hwsp);
 void i915_timeline_fini(struct i915_timeline *tl);
 
 static inline void
@@ -96,7 +104,9 @@ i915_timeline_set_subclass(struct i915_timeline *timeline,
 }
 
 struct i915_timeline *
-i915_timeline_create(struct drm_i915_private *i915, const char *name);
+i915_timeline_create(struct drm_i915_private *i915,
+		     const char *name,
+		     struct i915_vma *global_hwsp);
 
 static inline struct i915_timeline *
 i915_timeline_get(struct i915_timeline *timeline)
@@ -135,6 +145,9 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 	return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
 }
 
+int i915_timeline_pin(struct i915_timeline *tl);
+void i915_timeline_unpin(struct i915_timeline *tl);
+
 void i915_timelines_init(struct drm_i915_private *i915);
 void i915_timelines_park(struct drm_i915_private *i915);
 void i915_timelines_fini(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 2657eb6fd914..515e87846afd 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -484,26 +484,6 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 	execlists->queue = RB_ROOT_CACHED;
 }
 
-/**
- * intel_engines_setup_common - setup engine state not requiring hw access
- * @engine: Engine to setup.
- *
- * Initializes @engine@ structure members shared between legacy and execlists
- * submission modes which do not require hardware access.
- *
- * Typically done early in the submission mode specific engine setup stage.
- */
-void intel_engine_setup_common(struct intel_engine_cs *engine)
-{
-	i915_timeline_init(engine->i915, &engine->timeline, engine->name);
-	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
-
-	intel_engine_init_execlist(engine);
-	intel_engine_init_hangcheck(engine);
-	intel_engine_init_batch_pool(engine);
-	intel_engine_init_cmd_parser(engine);
-}
-
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
 	struct i915_vma *vma;
@@ -601,6 +581,44 @@ err:
 	return ret;
 }
 
+/**
+ * intel_engines_setup_common - setup engine state not requiring hw access
+ * @engine: Engine to setup.
+ *
+ * Initializes @engine@ structure members shared between legacy and execlists
+ * submission modes which do not require hardware access.
+ *
+ * Typically done early in the submission mode specific engine setup stage.
+ */
+int intel_engine_setup_common(struct intel_engine_cs *engine)
+{
+	int err;
+
+	err = init_status_page(engine);
+	if (err)
+		return err;
+
+	err = i915_timeline_init(engine->i915,
+				 &engine->timeline,
+				 engine->name,
+				 engine->status_page.vma);
+	if (err)
+		goto err_hwsp;
+
+	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
+
+	intel_engine_init_execlist(engine);
+	intel_engine_init_hangcheck(engine);
+	intel_engine_init_batch_pool(engine);
+	intel_engine_init_cmd_parser(engine);
+
+	return 0;
+
+err_hwsp:
+	cleanup_status_page(engine);
+	return err;
+}
+
 static void __intel_context_unpin(struct i915_gem_context *ctx,
 				  struct intel_engine_cs *engine)
 {
@@ -617,7 +635,7 @@ struct measure_breadcrumb {
 static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 {
 	struct measure_breadcrumb *frame;
-	unsigned int dw;
+	int dw = -ENOMEM;
 
 	GEM_BUG_ON(!engine->i915->gt.scratch);
 
@@ -625,7 +643,10 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 	if (!frame)
 		return -ENOMEM;
 
-	i915_timeline_init(engine->i915, &frame->timeline, "measure");
+	if (i915_timeline_init(engine->i915,
+			       &frame->timeline, "measure",
+			       engine->status_page.vma))
+		goto out_frame;
 
 	INIT_LIST_HEAD(&frame->ring.request_list);
 	frame->ring.timeline = &frame->timeline;
@@ -642,8 +663,9 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 	dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs;
 
 	i915_timeline_fini(&frame->timeline);
-	kfree(frame);
 
+out_frame:
+	kfree(frame);
 	return dw;
 }
 
@@ -693,20 +715,14 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	if (ret)
 		goto err_unpin_preempt;
 
-	ret = init_status_page(engine);
-	if (ret)
-		goto err_breadcrumbs;
-
 	ret = measure_breadcrumb_dw(engine);
 	if (ret < 0)
-		goto err_status_page;
+		goto err_breadcrumbs;
 
 	engine->emit_breadcrumb_dw = ret;
 
 	return 0;
 
-err_status_page:
-	cleanup_status_page(engine);
 err_breadcrumbs:
 	intel_engine_fini_breadcrumbs(engine);
 err_unpin_preempt:
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9ae7f77293a0..e388f37743a2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2206,10 +2206,14 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 }
 
-static void
+static int
 logical_ring_setup(struct intel_engine_cs *engine)
 {
-	intel_engine_setup_common(engine);
+	int err;
+
+	err = intel_engine_setup_common(engine);
+	if (err)
+		return err;
 
 	/* Intentionally left blank. */
 	engine->buffer = NULL;
@@ -2219,6 +2223,8 @@ logical_ring_setup(struct intel_engine_cs *engine)
 
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
+
+	return 0;
 }
 
 static int logical_ring_init(struct intel_engine_cs *engine)
@@ -2267,7 +2273,9 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 {
 	int ret;
 
-	logical_ring_setup(engine);
+	ret = logical_ring_setup(engine);
+	if (ret)
+		return ret;
 
 	/* Override some for render ring. */
 	engine->init_context = gen8_init_rcs_context;
@@ -2296,7 +2304,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 
 int logical_xcs_ring_init(struct intel_engine_cs *engine)
 {
-	logical_ring_setup(engine);
+	int err;
+
+	err = logical_ring_setup(engine);
+	if (err)
+		return err;
 
 	return logical_ring_init(engine);
 }
@@ -2629,7 +2641,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 		goto error_deref_obj;
 	}
 
-	timeline = i915_timeline_create(ctx->i915, ctx->name);
+	timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
 	if (IS_ERR(timeline)) {
 		ret = PTR_ERR(timeline);
 		goto error_deref_obj;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index cb6d2aa2a829..174795622eb1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1545,9 +1545,13 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 	struct intel_ring *ring;
 	int err;
 
-	intel_engine_setup_common(engine);
+	err = intel_engine_setup_common(engine);
+	if (err)
+		return err;
 
-	timeline = i915_timeline_create(engine->i915, engine->name);
+	timeline = i915_timeline_create(engine->i915,
+					engine->name,
+					engine->status_page.vma);
 	if (IS_ERR(timeline)) {
 		err = PTR_ERR(timeline);
 		goto err;
@@ -1571,6 +1575,8 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 	if (err)
 		goto err_unpin;
 
+	GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
+
 	return 0;
 
 err_unpin:
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 32371ae67f24..2927b712b973 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -712,7 +712,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 #define I915_GEM_HWS_INDEX_ADDR		(I915_GEM_HWS_INDEX * sizeof(u32))
 #define I915_GEM_HWS_PREEMPT		0x32
 #define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32))
-#define I915_GEM_HWS_SCRATCH		0x40
+#define I915_GEM_HWS_SEQNO		0x40
+#define I915_GEM_HWS_SEQNO_ADDR		(I915_GEM_HWS_SEQNO * sizeof(u32))
+#define I915_GEM_HWS_SCRATCH		0x80
 #define I915_GEM_HWS_SCRATCH_ADDR	(I915_GEM_HWS_SCRATCH * sizeof(u32))
 
 #define I915_HWS_CSB_BUF0_INDEX		0x10
@@ -818,7 +820,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
 
 void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
-void intel_engine_setup_common(struct intel_engine_cs *engine);
+int intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index a15713cae3b3..76b4f87fc853 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -13,6 +13,7 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
 selftest(uncore, intel_uncore_live_selftests)
 selftest(workarounds, intel_workarounds_live_selftests)
 selftest(requests, i915_request_live_selftests)
+selftest(timelines, i915_timeline_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
 selftest(dmabuf, i915_gem_dmabuf_live_selftests)
 selftest(coherency, i915_gem_coherency_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 1b70208eeea7..4a83a1c6c406 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -16,7 +16,7 @@ selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
 selftest(engine, intel_engine_cs_mock_selftests)
 selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
-selftest(timelines, i915_gem_timeline_mock_selftests)
+selftest(timelines, i915_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
 selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
index 19f1c6a5c8fb..1585b614510d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
@@ -7,6 +7,7 @@
 #include "../i915_selftest.h"
 #include "i915_random.h"
 
+#include "igt_flush_test.h"
 #include "mock_gem_device.h"
 #include "mock_timeline.h"
 
@@ -256,7 +257,7 @@ static int bench_sync(void *arg)
 	return 0;
 }
 
-int i915_gem_timeline_mock_selftests(void)
+int i915_timeline_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_sync),
@@ -265,3 +266,326 @@ int i915_gem_timeline_mock_selftests(void)
 
 	return i915_subtests(tests, NULL);
 }
+
+static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	if (INTEL_GEN(rq->i915) >= 8) {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*cs++ = addr;
+		*cs++ = 0;
+		*cs++ = value;
+	} else if (INTEL_GEN(rq->i915) >= 4) {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*cs++ = 0;
+		*cs++ = addr;
+		*cs++ = value;
+	} else {
+		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+		*cs++ = addr;
+		*cs++ = value;
+		*cs++ = MI_NOOP;
+	}
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static u32 hwsp_address(const struct i915_timeline *tl)
+{
+	return i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
+}
+
+static struct i915_request *
+tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
+{
+	struct i915_request *rq;
+	int err;
+
+	lockdep_assert_held(&tl->i915->drm.struct_mutex); /* lazy rq refs */
+
+	err = i915_timeline_pin(tl);
+	if (err) {
+		rq = ERR_PTR(err);
+		goto out;
+	}
+
+	rq = i915_request_alloc(engine, engine->i915->kernel_context);
+	if (IS_ERR(rq))
+		goto out_unpin;
+
+	err = emit_ggtt_store_dw(rq, hwsp_address(tl), value);
+	i915_request_add(rq);
+	if (err)
+		rq = ERR_PTR(err);
+
+out_unpin:
+	i915_timeline_unpin(tl);
+out:
+	if (IS_ERR(rq))
+		pr_err("Failed to write to timeline!\n");
+	return rq;
+}
+
+static struct i915_timeline *
+checked_i915_timeline_create(struct drm_i915_private *i915)
+{
+	struct i915_timeline *tl;
+
+	tl = i915_timeline_create(i915, "live", NULL);
+	if (IS_ERR(tl))
+		return tl;
+
+	if (*tl->hwsp_seqno != tl->seqno) {
+		pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
+		       *tl->hwsp_seqno, tl->seqno);
+		i915_timeline_put(tl);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return tl;
+}
+
+static int live_hwsp_engine(void *arg)
+{
+#define NUM_TIMELINES 4096
+	struct drm_i915_private *i915 = arg;
+	struct i915_timeline **timelines;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	unsigned long count, n;
+	int err = 0;
+
+	/*
+	 * Create a bunch of timelines and check we can write
+	 * independently to each of their breadcrumb slots.
+	 */
+
+	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
+				   sizeof(*timelines),
+				   GFP_KERNEL);
+	if (!timelines)
+		return -ENOMEM;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	count = 0;
+	for_each_engine(engine, i915, id) {
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		for (n = 0; n < NUM_TIMELINES; n++) {
+			struct i915_timeline *tl;
+			struct i915_request *rq;
+
+			tl = checked_i915_timeline_create(i915);
+			if (IS_ERR(tl)) {
+				err = PTR_ERR(tl);
+				goto out;
+			}
+
+			rq = tl_write(tl, engine, count);
+			if (IS_ERR(rq)) {
+				i915_timeline_put(tl);
+				err = PTR_ERR(rq);
+				goto out;
+			}
+
+			timelines[count++] = tl;
+		}
+	}
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (n = 0; n < count; n++) {
+		struct i915_timeline *tl = timelines[n];
+
+		if (!err && *tl->hwsp_seqno != n) {
+			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+			       n, *tl->hwsp_seqno);
+			err = -EINVAL;
+		}
+		i915_timeline_put(tl);
+	}
+
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	kvfree(timelines);
+
+	return err;
+#undef NUM_TIMELINES
+}
+
+static int live_hwsp_alternate(void *arg)
+{
+#define NUM_TIMELINES 4096
+	struct drm_i915_private *i915 = arg;
+	struct i915_timeline **timelines;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	unsigned long count, n;
+	int err = 0;
+
+	/*
+	 * Create a bunch of timelines and check we can write
+	 * independently to each of their breadcrumb slots with adjacent
+	 * engines.
+	 */
+
+	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
+				   sizeof(*timelines),
+				   GFP_KERNEL);
+	if (!timelines)
+		return -ENOMEM;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	count = 0;
+	for (n = 0; n < NUM_TIMELINES; n++) {
+		for_each_engine(engine, i915, id) {
+			struct i915_timeline *tl;
+			struct i915_request *rq;
+
+			if (!intel_engine_can_store_dword(engine))
+				continue;
+
+			tl = checked_i915_timeline_create(i915);
+			if (IS_ERR(tl)) {
+				err = PTR_ERR(tl);
+				goto out;
+			}
+
+			rq = tl_write(tl, engine, count);
+			if (IS_ERR(rq)) {
+				i915_timeline_put(tl);
+				err = PTR_ERR(rq);
+				goto out;
+			}
+
+			timelines[count++] = tl;
+		}
+	}
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (n = 0; n < count; n++) {
+		struct i915_timeline *tl = timelines[n];
+
+		if (!err && *tl->hwsp_seqno != n) {
+			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+			       n, *tl->hwsp_seqno);
+			err = -EINVAL;
+		}
+		i915_timeline_put(tl);
+	}
+
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	kvfree(timelines);
+
+	return err;
+#undef NUM_TIMELINES
+}
+
+static int live_hwsp_recycle(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	unsigned long count;
+	int err = 0;
+
+	/*
+	 * Check seqno writes into one timeline at a time. We expect to
+	 * recycle the breadcrumb slot between iterations and neither
+	 * want to confuse ourselves or the GPU.
+	 */
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	count = 0;
+	for_each_engine(engine, i915, id) {
+		IGT_TIMEOUT(end_time);
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		do {
+			struct i915_timeline *tl;
+			struct i915_request *rq;
+
+			tl = checked_i915_timeline_create(i915);
+			if (IS_ERR(tl)) {
+				err = PTR_ERR(tl);
+				goto out;
+			}
+
+			rq = tl_write(tl, engine, count);
+			if (IS_ERR(rq)) {
+				i915_timeline_put(tl);
+				err = PTR_ERR(rq);
+				goto out;
+			}
+
+			if (i915_request_wait(rq,
+					      I915_WAIT_LOCKED,
+					      HZ / 5) < 0) {
+				pr_err("Wait for timeline writes timed out!\n");
+				i915_timeline_put(tl);
+				err = -EIO;
+				goto out;
+			}
+
+			if (*tl->hwsp_seqno != count) {
+				pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+				       count, *tl->hwsp_seqno);
+				err = -EINVAL;
+			}
+
+			i915_timeline_put(tl);
+			count++;
+
+			if (err)
+				goto out;
+
+			i915_timelines_park(i915); /* Encourage recycling! */
+		} while (!__igt_timeout(end_time, NULL));
+	}
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	return err;
+}
+
+int i915_timeline_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_hwsp_recycle),
+		SUBTEST(live_hwsp_engine),
+		SUBTEST(live_hwsp_alternate),
+	};
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 4e5b4dc6df0f..919c89fd6ee5 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -39,7 +39,12 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	if (!ring)
 		return NULL;
 
-	i915_timeline_init(engine->i915, &ring->timeline, engine->name);
+	if (i915_timeline_init(engine->i915,
+			       &ring->timeline, engine->name,
+			       NULL)) {
+		kfree(ring);
+		return NULL;
+	}
 
 	ring->base.size = sz;
 	ring->base.effective_size = sz;
@@ -208,7 +213,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.emit_breadcrumb = mock_emit_breadcrumb;
 	engine->base.submit_request = mock_submit_request;
 
-	i915_timeline_init(i915, &engine->base.timeline, engine->base.name);
+	if (i915_timeline_init(i915,
+			       &engine->base.timeline,
+			       engine->base.name,
+			       NULL))
+		goto err_free;
 	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
 
 	intel_engine_init_breadcrumbs(&engine->base);
@@ -226,6 +235,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 err_breadcrumbs:
 	intel_engine_fini_breadcrumbs(&engine->base);
 	i915_timeline_fini(&engine->base.timeline);
+err_free:
 	kfree(engine);
 	return NULL;
 }

From 8ba306a6a362ef6f3c005ec8819c8890a6fadcd1 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 28 Jan 2019 18:18:10 +0000
Subject: [PATCH 46/89] drm/i915: Share per-timeline HWSP using a slab
 suballocator

If we restrict ourselves to only using a cacheline for each timeline's
HWSP (we could go smaller, but want to avoid needless polluting
cachelines on different engines between different contexts), then we can
suballocate a single 4k page into 64 different timeline HWSP. By
treating each fresh allocation as a slab of 64 entries, we can keep it
around for the next 64 allocation attempts until we need to refresh the
slab cache.

John Harrison noted the issue of fragmentation leading to the same worst
case performance of one page per timeline as before, which can be
mitigated by adopting a freelist.

v2: Keep all partially allocated HWSP on a freelist

This is still without migration, so it is possible for the system to end
up with each timeline in its own page, but we ensure that no new
allocation would needless allocate a fresh page!

v3: Throw a selftest at the allocator to try and catch invalid cacheline
reuse.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-4-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_drv.h               |   4 +
 drivers/gpu/drm/i915/i915_timeline.c          | 126 ++++++++++++---
 drivers/gpu/drm/i915/i915_timeline.h          |   1 +
 drivers/gpu/drm/i915/selftests/i915_random.c  |  33 +++-
 drivers/gpu/drm/i915/selftests/i915_random.h  |   3 +
 .../gpu/drm/i915/selftests/i915_timeline.c    | 143 ++++++++++++++++++
 6 files changed, 281 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8a181b455197..6a051381f535 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1978,6 +1978,10 @@ struct drm_i915_private {
 		struct i915_gt_timelines {
 			struct mutex mutex; /* protects list, tainted by GPU */
 			struct list_head list;
+
+			/* Pack multiple timelines' seqnos into the same page */
+			spinlock_t hwsp_lock;
+			struct list_head hwsp_free_list;
 		} timelines;
 
 		struct list_head active_rings;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 8d5792311a8f..add8fc33cf6e 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -9,6 +9,18 @@
 #include "i915_timeline.h"
 #include "i915_syncmap.h"
 
+struct i915_timeline_hwsp {
+	struct i915_vma *vma;
+	struct list_head free_link;
+	u64 free_bitmap;
+};
+
+static inline struct i915_timeline_hwsp *
+i915_timeline_hwsp(const struct i915_timeline *tl)
+{
+	return tl->hwsp_ggtt->private;
+}
+
 static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
 {
 	struct drm_i915_gem_object *obj;
@@ -27,28 +39,89 @@ static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
 	return vma;
 }
 
-static int hwsp_alloc(struct i915_timeline *timeline)
+static struct i915_vma *
+hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 {
-	struct i915_vma *vma;
+	struct drm_i915_private *i915 = timeline->i915;
+	struct i915_gt_timelines *gt = &i915->gt.timelines;
+	struct i915_timeline_hwsp *hwsp;
 
-	vma = __hwsp_alloc(timeline->i915);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
+	BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
 
-	timeline->hwsp_ggtt = vma;
-	timeline->hwsp_offset = 0;
+	spin_lock(&gt->hwsp_lock);
 
-	return 0;
+	/* hwsp_free_list only contains HWSP that have available cachelines */
+	hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
+					typeof(*hwsp), free_link);
+	if (!hwsp) {
+		struct i915_vma *vma;
+
+		spin_unlock(&gt->hwsp_lock);
+
+		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
+		if (!hwsp)
+			return ERR_PTR(-ENOMEM);
+
+		vma = __hwsp_alloc(i915);
+		if (IS_ERR(vma)) {
+			kfree(hwsp);
+			return vma;
+		}
+
+		vma->private = hwsp;
+		hwsp->vma = vma;
+		hwsp->free_bitmap = ~0ull;
+
+		spin_lock(&gt->hwsp_lock);
+		list_add(&hwsp->free_link, &gt->hwsp_free_list);
+	}
+
+	GEM_BUG_ON(!hwsp->free_bitmap);
+	*cacheline = __ffs64(hwsp->free_bitmap);
+	hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
+	if (!hwsp->free_bitmap)
+		list_del(&hwsp->free_link);
+
+	spin_unlock(&gt->hwsp_lock);
+
+	GEM_BUG_ON(hwsp->vma->private != hwsp);
+	return hwsp->vma;
+}
+
+static void hwsp_free(struct i915_timeline *timeline)
+{
+	struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
+	struct i915_timeline_hwsp *hwsp;
+
+	hwsp = i915_timeline_hwsp(timeline);
+	if (!hwsp) /* leave global HWSP alone! */
+		return;
+
+	spin_lock(&gt->hwsp_lock);
+
+	/* As a cacheline becomes available, publish the HWSP on the freelist */
+	if (!hwsp->free_bitmap)
+		list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
+
+	hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+
+	/* And if no one is left using it, give the page back to the system */
+	if (hwsp->free_bitmap == ~0ull) {
+		i915_vma_put(hwsp->vma);
+		list_del(&hwsp->free_link);
+		kfree(hwsp);
+	}
+
+	spin_unlock(&gt->hwsp_lock);
 }
 
 int i915_timeline_init(struct drm_i915_private *i915,
 		       struct i915_timeline *timeline,
 		       const char *name,
-		       struct i915_vma *global_hwsp)
+		       struct i915_vma *hwsp)
 {
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
 	void *vaddr;
-	int err;
 
 	/*
 	 * Ideally we want a set of engines on a single leaf as we expect
@@ -64,18 +137,22 @@ int i915_timeline_init(struct drm_i915_private *i915,
 	timeline->name = name;
 	timeline->pin_count = 0;
 
-	if (global_hwsp) {
-		timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
-		timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
-	} else {
-		err = hwsp_alloc(timeline);
-		if (err)
-			return err;
-	}
+	timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
+	if (!hwsp) {
+		unsigned int cacheline;
 
-	vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
+		hwsp = hwsp_alloc(timeline, &cacheline);
+		if (IS_ERR(hwsp))
+			return PTR_ERR(hwsp);
+
+		timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
+	}
+	timeline->hwsp_ggtt = i915_vma_get(hwsp);
+
+	vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
-		i915_vma_put(timeline->hwsp_ggtt);
+		hwsp_free(timeline);
+		i915_vma_put(hwsp);
 		return PTR_ERR(vaddr);
 	}
 
@@ -105,6 +182,9 @@ void i915_timelines_init(struct drm_i915_private *i915)
 	mutex_init(&gt->mutex);
 	INIT_LIST_HEAD(&gt->list);
 
+	spin_lock_init(&gt->hwsp_lock);
+	INIT_LIST_HEAD(&gt->hwsp_free_list);
+
 	/* via i915_gem_wait_for_idle() */
 	i915_gem_shrinker_taints_mutex(i915, &gt->mutex);
 }
@@ -144,12 +224,13 @@ void i915_timeline_fini(struct i915_timeline *timeline)
 	GEM_BUG_ON(timeline->pin_count);
 	GEM_BUG_ON(!list_empty(&timeline->requests));
 
-	i915_syncmap_free(&timeline->sync);
-
 	mutex_lock(&gt->mutex);
 	list_del(&timeline->link);
 	mutex_unlock(&gt->mutex);
 
+	i915_syncmap_free(&timeline->sync);
+	hwsp_free(timeline);
+
 	i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
 	i915_vma_put(timeline->hwsp_ggtt);
 }
@@ -226,6 +307,7 @@ void i915_timelines_fini(struct drm_i915_private *i915)
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
 
 	GEM_BUG_ON(!list_empty(&gt->list));
+	GEM_BUG_ON(!list_empty(&gt->hwsp_free_list));
 
 	mutex_destroy(&gt->mutex);
 }
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 0c3739d53d79..ab736e2e5707 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -33,6 +33,7 @@
 #include "i915_utils.h"
 
 struct i915_vma;
+struct i915_timeline_hwsp;
 
 struct i915_timeline {
 	u64 fence_context;
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c
index 1f415ce47018..716a3f19f030 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.c
+++ b/drivers/gpu/drm/i915/selftests/i915_random.c
@@ -41,16 +41,35 @@ u64 i915_prandom_u64_state(struct rnd_state *rnd)
 	return x;
 }
 
+void i915_prandom_shuffle(void *arr, size_t elsz, size_t count,
+			  struct rnd_state *state)
+{
+	char stack[128];
+
+	if (WARN_ON(elsz > sizeof(stack) || count > U32_MAX))
+		return;
+
+	if (!elsz || !count)
+		return;
+
+	/* Fisher-Yates shuffle courtesy of Knuth */
+	while (--count) {
+		size_t swp;
+
+		swp = i915_prandom_u32_max_state(count + 1, state);
+		if (swp == count)
+			continue;
+
+		memcpy(stack, arr + count * elsz, elsz);
+		memcpy(arr + count * elsz, arr + swp * elsz, elsz);
+		memcpy(arr + swp * elsz, stack, elsz);
+	}
+}
+
 void i915_random_reorder(unsigned int *order, unsigned int count,
 			 struct rnd_state *state)
 {
-	unsigned int i, j;
-
-	for (i = 0; i < count; i++) {
-		BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32));
-		j = i915_prandom_u32_max_state(count, state);
-		swap(order[i], order[j]);
-	}
+	i915_prandom_shuffle(order, sizeof(*order), count, state);
 }
 
 unsigned int *i915_random_order(unsigned int count, struct rnd_state *state)
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h
index 7dffedc501ca..8e1ff9c105b6 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.h
+++ b/drivers/gpu/drm/i915/selftests/i915_random.h
@@ -54,4 +54,7 @@ void i915_random_reorder(unsigned int *order,
 			 unsigned int count,
 			 struct rnd_state *state);
 
+void i915_prandom_shuffle(void *arr, size_t elsz, size_t count,
+			  struct rnd_state *state);
+
 #endif /* !__I915_SELFTESTS_RANDOM_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
index 1585b614510d..c34340f074cf 100644
--- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
@@ -4,6 +4,8 @@
  * Copyright © 2017-2018 Intel Corporation
  */
 
+#include <linux/prime_numbers.h>
+
 #include "../i915_selftest.h"
 #include "i915_random.h"
 
@@ -11,6 +13,146 @@
 #include "mock_gem_device.h"
 #include "mock_timeline.h"
 
+static struct page *hwsp_page(struct i915_timeline *tl)
+{
+	struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	return sg_page(obj->mm.pages->sgl);
+}
+
+static unsigned long hwsp_cacheline(struct i915_timeline *tl)
+{
+	unsigned long address = (unsigned long)page_address(hwsp_page(tl));
+
+	return (address + tl->hwsp_offset) / CACHELINE_BYTES;
+}
+
+#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
+
+struct mock_hwsp_freelist {
+	struct drm_i915_private *i915;
+	struct radix_tree_root cachelines;
+	struct i915_timeline **history;
+	unsigned long count, max;
+	struct rnd_state prng;
+};
+
+enum {
+	SHUFFLE = BIT(0),
+};
+
+static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
+			       unsigned int idx,
+			       struct i915_timeline *tl)
+{
+	tl = xchg(&state->history[idx], tl);
+	if (tl) {
+		radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
+		i915_timeline_put(tl);
+	}
+}
+
+static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
+				unsigned int count,
+				unsigned int flags)
+{
+	struct i915_timeline *tl;
+	unsigned int idx;
+
+	while (count--) {
+		unsigned long cacheline;
+		int err;
+
+		tl = i915_timeline_create(state->i915, "mock", NULL);
+		if (IS_ERR(tl))
+			return PTR_ERR(tl);
+
+		cacheline = hwsp_cacheline(tl);
+		err = radix_tree_insert(&state->cachelines, cacheline, tl);
+		if (err) {
+			if (err == -EEXIST) {
+				pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
+				       cacheline);
+			}
+			i915_timeline_put(tl);
+			return err;
+		}
+
+		idx = state->count++ % state->max;
+		__mock_hwsp_record(state, idx, tl);
+	}
+
+	if (flags & SHUFFLE)
+		i915_prandom_shuffle(state->history,
+				     sizeof(*state->history),
+				     min(state->count, state->max),
+				     &state->prng);
+
+	count = i915_prandom_u32_max_state(min(state->count, state->max),
+					   &state->prng);
+	while (count--) {
+		idx = --state->count % state->max;
+		__mock_hwsp_record(state, idx, NULL);
+	}
+
+	return 0;
+}
+
+static int mock_hwsp_freelist(void *arg)
+{
+	struct mock_hwsp_freelist state;
+	const struct {
+		const char *name;
+		unsigned int flags;
+	} phases[] = {
+		{ "linear", 0 },
+		{ "shuffled", SHUFFLE },
+		{ },
+	}, *p;
+	unsigned int na;
+	int err = 0;
+
+	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
+	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
+
+	state.i915 = mock_gem_device();
+	if (!state.i915)
+		return -ENOMEM;
+
+	/*
+	 * Create a bunch of timelines and check that their HWSP do not overlap.
+	 * Free some, and try again.
+	 */
+
+	state.max = PAGE_SIZE / sizeof(*state.history);
+	state.count = 0;
+	state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
+	if (!state.history) {
+		err = -ENOMEM;
+		goto err_put;
+	}
+
+	mutex_lock(&state.i915->drm.struct_mutex);
+	for (p = phases; p->name; p++) {
+		pr_debug("%s(%s)\n", __func__, p->name);
+		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
+			err = __mock_hwsp_timeline(&state, na, p->flags);
+			if (err)
+				goto out;
+		}
+	}
+
+out:
+	for (na = 0; na < state.max; na++)
+		__mock_hwsp_record(&state, na, NULL);
+	mutex_unlock(&state.i915->drm.struct_mutex);
+	kfree(state.history);
+err_put:
+	drm_dev_put(&state.i915->drm);
+	return err;
+}
+
 struct __igt_sync {
 	const char *name;
 	u32 seqno;
@@ -260,6 +402,7 @@ static int bench_sync(void *arg)
 int i915_timeline_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
+		SUBTEST(mock_hwsp_freelist),
 		SUBTEST(igt_sync),
 		SUBTEST(bench_sync),
 	};

From 5013eb8cd601c31e6d7d1b9d3291b24e933b77b2 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 28 Jan 2019 18:18:11 +0000
Subject: [PATCH 47/89] drm/i915: Track the context's seqno in its own timeline
 HWSP

Now that we have allocated ourselves a cacheline to store a breadcrumb,
we can emit a write from the GPU into the timeline's HWSP of the
per-context seqno as we complete each request. This drops the mirroring
of the per-engine HWSP and allows each context to operate independently.
We do not need to unwind the per-context timeline, and so requests are
always consistent with the timeline breadcrumb, greatly simplifying the
completion checks as we no longer need to be concerned about the
global_seqno changing mid check.

One complication though is that we have to be wary that the request may
outlive the HWSP and so avoid touching the potentially danging pointer
after we have retired the fence. We also have to guard our access of the
HWSP with RCU, the release of the obj->mm.pages should already be RCU-safe.

At this point, we are emitting both per-context and global seqno and
still using the single per-engine execution timeline for resolving
interrupts.

v2: s/fake_complete/mark_complete/

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-5-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem.c               |  2 +-
 drivers/gpu/drm/i915/i915_request.c           |  3 +-
 drivers/gpu/drm/i915/i915_request.h           | 30 +++----
 drivers/gpu/drm/i915/i915_reset.c             |  1 +
 drivers/gpu/drm/i915/i915_timeline.c          |  4 +
 drivers/gpu/drm/i915/intel_engine_cs.c        | 15 +++-
 drivers/gpu/drm/i915/intel_lrc.c              | 31 ++++---
 drivers/gpu/drm/i915/intel_ringbuffer.c       | 87 +++++++++++++++----
 .../gpu/drm/i915/selftests/i915_timeline.c    |  7 +-
 drivers/gpu/drm/i915/selftests/mock_engine.c  | 19 +++-
 10 files changed, 139 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 761714448ff3..4e0de22f0166 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2890,7 +2890,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	 */
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 	list_for_each_entry(request, &engine->timeline.requests, link) {
-		if (__i915_request_completed(request, request->global_seqno))
+		if (i915_request_completed(request))
 			continue;
 
 		active = request;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a076fd0b7ba6..4d58770e6a8c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -199,6 +199,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
 	spin_unlock(&engine->timeline.lock);
 
 	spin_lock(&rq->lock);
+	i915_request_mark_complete(rq);
 	if (!i915_request_signaled(rq))
 		dma_fence_signal_locked(&rq->fence);
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
@@ -621,7 +622,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->ring = ce->ring;
 	rq->timeline = ce->ring->timeline;
 	GEM_BUG_ON(rq->timeline == &engine->timeline);
-	rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX];
+	rq->hwsp_seqno = rq->timeline->hwsp_seqno;
 
 	spin_lock_init(&rq->lock);
 	dma_fence_init(&rq->fence,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index ade010fe6e26..96c586d6ff4d 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -289,6 +289,7 @@ long i915_request_wait(struct i915_request *rq,
 
 static inline bool i915_request_signaled(const struct i915_request *rq)
 {
+	/* The request may live longer than its HWSP, so check flags first! */
 	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
 }
 
@@ -340,32 +341,23 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
  */
 static inline bool i915_request_started(const struct i915_request *rq)
 {
-	u32 seqno;
+	if (i915_request_signaled(rq))
+		return true;
 
-	seqno = i915_request_global_seqno(rq);
-	if (!seqno) /* not yet submitted to HW */
-		return false;
-
-	return i915_seqno_passed(hwsp_seqno(rq), seqno - 1);
-}
-
-static inline bool
-__i915_request_completed(const struct i915_request *rq, u32 seqno)
-{
-	GEM_BUG_ON(!seqno);
-	return i915_seqno_passed(hwsp_seqno(rq), seqno) &&
-		seqno == i915_request_global_seqno(rq);
+	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
 }
 
 static inline bool i915_request_completed(const struct i915_request *rq)
 {
-	u32 seqno;
+	if (i915_request_signaled(rq))
+		return true;
 
-	seqno = i915_request_global_seqno(rq);
-	if (!seqno)
-		return false;
+	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
+}
 
-	return __i915_request_completed(rq, seqno);
+static inline void i915_request_mark_complete(struct i915_request *rq)
+{
+	rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */
 }
 
 void i915_retire_requests(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index d2dca85a543d..bd82f9b1043f 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -760,6 +760,7 @@ static void nop_submit_request(struct i915_request *request)
 
 	spin_lock_irqsave(&request->engine->timeline.lock, flags);
 	__i915_request_submit(request);
+	i915_request_mark_complete(request);
 	intel_engine_write_global_seqno(request->engine, request->global_seqno);
 	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
 }
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index add8fc33cf6e..e4c11414a824 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -270,6 +270,10 @@ int i915_timeline_pin(struct i915_timeline *tl)
 	if (err)
 		goto unpin;
 
+	tl->hwsp_offset =
+		i915_ggtt_offset(tl->hwsp_ggtt) +
+		offset_in_page(tl->hwsp_offset);
+
 	return 0;
 
 unpin:
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 515e87846afd..ead9c4371fe1 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -660,10 +660,16 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 	frame->rq.ring = &frame->ring;
 	frame->rq.timeline = &frame->timeline;
 
+	dw = i915_timeline_pin(&frame->timeline);
+	if (dw < 0)
+		goto out_timeline;
+
 	dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs;
 
-	i915_timeline_fini(&frame->timeline);
+	i915_timeline_unpin(&frame->timeline);
 
+out_timeline:
+	i915_timeline_fini(&frame->timeline);
 out_frame:
 	kfree(frame);
 	return dw;
@@ -1426,9 +1432,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 				char hdr[80];
 
 				snprintf(hdr, sizeof(hdr),
-					 "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ",
+					 "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ",
 					 idx, count,
-					 i915_ggtt_offset(rq->ring->vma));
+					 i915_ggtt_offset(rq->ring->vma),
+					 rq->timeline->hwsp_offset);
 				print_request(m, rq, hdr);
 			} else {
 				drm_printf(m, "\t\tELSP[%d] idle\n", idx);
@@ -1538,6 +1545,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 			   rq->ring->emit);
 		drm_printf(m, "\t\tring->space:  0x%08x\n",
 			   rq->ring->space);
+		drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
+			   rq->timeline->hwsp_offset);
 
 		print_request_ring(m, rq);
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e388f37743a2..fdbb3fe8eac9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -832,10 +832,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	list_for_each_entry(rq, &engine->timeline.requests, link) {
 		GEM_BUG_ON(!rq->global_seqno);
 
-		if (i915_request_signaled(rq))
-			continue;
+		if (!i915_request_signaled(rq))
+			dma_fence_set_error(&rq->fence, -EIO);
 
-		dma_fence_set_error(&rq->fence, -EIO);
+		i915_request_mark_complete(rq);
 	}
 
 	/* Flush the queued requests to the timeline list (for retiring). */
@@ -845,9 +845,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
 			list_del_init(&rq->sched.link);
-
-			dma_fence_set_error(&rq->fence, -EIO);
 			__i915_request_submit(rq);
+			dma_fence_set_error(&rq->fence, -EIO);
+			i915_request_mark_complete(rq);
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
@@ -2044,10 +2044,17 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
 
-	cs = gen8_emit_ggtt_write(cs, request->global_seqno,
+	cs = gen8_emit_ggtt_write(cs,
+				  request->fence.seqno,
+				  request->timeline->hwsp_offset);
+
+	cs = gen8_emit_ggtt_write(cs,
+				  request->global_seqno,
 				  intel_hws_seqno_address(request->engine));
+
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
@@ -2056,18 +2063,20 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
 
 static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
-	/* We're using qword write, seqno should be aligned to 8 bytes. */
-	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
-
 	cs = gen8_emit_ggtt_write_rcs(cs,
-				      request->global_seqno,
-				      intel_hws_seqno_address(request->engine),
+				      request->fence.seqno,
+				      request->timeline->hwsp_offset,
 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
 				      PIPE_CONTROL_FLUSH_ENABLE |
 				      PIPE_CONTROL_CS_STALL);
 
+	cs = gen8_emit_ggtt_write_rcs(cs,
+				      request->global_seqno,
+				      intel_hws_seqno_address(request->engine),
+				      PIPE_CONTROL_CS_STALL);
+
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 174795622eb1..ee3719324e2d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -326,6 +326,11 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 		 PIPE_CONTROL_DC_FLUSH_ENABLE |
 		 PIPE_CONTROL_QW_WRITE |
 		 PIPE_CONTROL_CS_STALL);
+	*cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
 	*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
 	*cs++ = rq->global_seqno;
 
@@ -427,6 +432,13 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 		 PIPE_CONTROL_QW_WRITE |
 		 PIPE_CONTROL_GLOBAL_GTT_IVB |
 		 PIPE_CONTROL_CS_STALL);
+	*cs++ = rq->timeline->hwsp_offset;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = (PIPE_CONTROL_QW_WRITE |
+		 PIPE_CONTROL_GLOBAL_GTT_IVB |
+		 PIPE_CONTROL_CS_STALL);
 	*cs++ = intel_hws_seqno_address(rq->engine);
 	*cs++ = rq->global_seqno;
 
@@ -441,10 +453,19 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
-	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
+	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->global_seqno;
+
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -457,14 +478,21 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	int i;
 
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
-	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
+	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->global_seqno;
 
 	for (i = 0; i < GEN7_XCS_WA; i++) {
 		*cs++ = MI_STORE_DWORD_INDEX;
-		*cs++ = I915_GEM_HWS_INDEX_ADDR;
-		*cs++ = rq->global_seqno;
+		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+		*cs++ = rq->fence.seqno;
 	}
 
 	*cs++ = MI_FLUSH_DW;
@@ -472,7 +500,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = 0;
 
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -738,7 +765,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 	rq = NULL;
 	spin_lock_irqsave(&tl->lock, flags);
 	list_for_each_entry(pos, &tl->requests, link) {
-		if (!__i915_request_completed(pos, pos->global_seqno)) {
+		if (!i915_request_completed(pos)) {
 			rq = pos;
 			break;
 		}
@@ -880,10 +907,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
 	list_for_each_entry(request, &engine->timeline.requests, link) {
 		GEM_BUG_ON(!request->global_seqno);
 
-		if (i915_request_signaled(request))
-			continue;
+		if (!i915_request_signaled(request))
+			dma_fence_set_error(&request->fence, -EIO);
 
-		dma_fence_set_error(&request->fence, -EIO);
+		i915_request_mark_complete(request);
 	}
 
 	intel_write_status_page(engine,
@@ -907,14 +934,20 @@ static void i9xx_submit_request(struct i915_request *request)
 
 static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
+	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
 	*cs++ = MI_FLUSH;
 
+	*cs++ = MI_STORE_DWORD_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+	*cs++ = rq->fence.seqno;
+
 	*cs++ = MI_STORE_DWORD_INDEX;
 	*cs++ = I915_GEM_HWS_INDEX_ADDR;
 	*cs++ = rq->global_seqno;
 
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -927,8 +960,15 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	int i;
 
+	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
 	*cs++ = MI_FLUSH;
 
+	*cs++ = MI_STORE_DWORD_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+	*cs++ = rq->fence.seqno;
+
 	BUILD_BUG_ON(GEN5_WA_STORES < 1);
 	for (i = 0; i < GEN5_WA_STORES; i++) {
 		*cs++ = MI_STORE_DWORD_INDEX;
@@ -937,6 +977,7 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	}
 
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -1169,6 +1210,10 @@ int intel_ring_pin(struct intel_ring *ring)
 
 	GEM_BUG_ON(ring->vaddr);
 
+	ret = i915_timeline_pin(ring->timeline);
+	if (ret)
+		return ret;
+
 	flags = PIN_GLOBAL;
 
 	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
@@ -1185,28 +1230,32 @@ int intel_ring_pin(struct intel_ring *ring)
 		else
 			ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
 		if (unlikely(ret))
-			return ret;
+			goto unpin_timeline;
 	}
 
 	ret = i915_vma_pin(vma, 0, 0, flags);
 	if (unlikely(ret))
-		return ret;
+		goto unpin_timeline;
 
 	if (i915_vma_is_map_and_fenceable(vma))
 		addr = (void __force *)i915_vma_pin_iomap(vma);
 	else
 		addr = i915_gem_object_pin_map(vma->obj, map);
-	if (IS_ERR(addr))
-		goto err;
+	if (IS_ERR(addr)) {
+		ret = PTR_ERR(addr);
+		goto unpin_ring;
+	}
 
 	vma->obj->pin_global++;
 
 	ring->vaddr = addr;
 	return 0;
 
-err:
+unpin_ring:
 	i915_vma_unpin(vma);
-	return PTR_ERR(addr);
+unpin_timeline:
+	i915_timeline_unpin(ring->timeline);
+	return ret;
 }
 
 void intel_ring_reset(struct intel_ring *ring, u32 tail)
@@ -1235,6 +1284,8 @@ void intel_ring_unpin(struct intel_ring *ring)
 
 	ring->vma->obj->pin_global--;
 	i915_vma_unpin(ring->vma);
+
+	i915_timeline_unpin(ring->timeline);
 }
 
 static struct i915_vma *
diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
index c34340f074cf..12ea69b1a1e5 100644
--- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
@@ -440,11 +440,6 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
 	return 0;
 }
 
-static u32 hwsp_address(const struct i915_timeline *tl)
-{
-	return i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
-}
-
 static struct i915_request *
 tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
 {
@@ -463,7 +458,7 @@ tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
 	if (IS_ERR(rq))
 		goto out_unpin;
 
-	err = emit_ggtt_store_dw(rq, hwsp_address(tl), value);
+	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
 	i915_request_add(rq);
 	if (err)
 		rq = ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 919c89fd6ee5..95e890d7f58b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -30,6 +30,17 @@ struct mock_ring {
 	struct i915_timeline timeline;
 };
 
+static void mock_timeline_pin(struct i915_timeline *tl)
+{
+	tl->pin_count++;
+}
+
+static void mock_timeline_unpin(struct i915_timeline *tl)
+{
+	GEM_BUG_ON(!tl->pin_count);
+	tl->pin_count--;
+}
+
 static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 {
 	const unsigned long sz = PAGE_SIZE / 2;
@@ -76,6 +87,8 @@ static void advance(struct mock_request *request)
 {
 	list_del_init(&request->link);
 	mock_seqno_advance(request->base.engine, request->base.global_seqno);
+	i915_request_mark_complete(&request->base);
+	GEM_BUG_ON(!i915_request_completed(&request->base));
 }
 
 static void hw_delay_complete(struct timer_list *t)
@@ -108,6 +121,7 @@ static void hw_delay_complete(struct timer_list *t)
 
 static void mock_context_unpin(struct intel_context *ce)
 {
+	mock_timeline_unpin(ce->ring->timeline);
 	i915_gem_context_put(ce->gem_context);
 }
 
@@ -129,6 +143,7 @@ mock_context_pin(struct intel_engine_cs *engine,
 		 struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = to_intel_context(ctx, engine);
+	int err = -ENOMEM;
 
 	if (ce->pin_count++)
 		return ce;
@@ -139,13 +154,15 @@ mock_context_pin(struct intel_engine_cs *engine,
 			goto err;
 	}
 
+	mock_timeline_pin(ce->ring->timeline);
+
 	ce->ops = &mock_context_ops;
 	i915_gem_context_get(ctx);
 	return ce;
 
 err:
 	ce->pin_count = 0;
-	return ERR_PTR(-ENOMEM);
+	return ERR_PTR(err);
 }
 
 static int mock_request_alloc(struct i915_request *request)

From 9407d3bdb048d52fa0fd45fa624a6c9f00072169 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 28 Jan 2019 18:18:12 +0000
Subject: [PATCH 48/89] drm/i915: Track active timelines

Now that we pin timelines around use, we have a clearly defined lifetime
and convenient points at which we can track only the active timelines.
This allows us to reduce the list iteration to only consider those
active timelines and not all.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-6-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_drv.h      |  2 +-
 drivers/gpu/drm/i915/i915_gem.c      |  4 +--
 drivers/gpu/drm/i915/i915_reset.c    |  2 +-
 drivers/gpu/drm/i915/i915_timeline.c | 39 ++++++++++++++++++----------
 4 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6a051381f535..d072f3369ee1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1977,7 +1977,7 @@ struct drm_i915_private {
 
 		struct i915_gt_timelines {
 			struct mutex mutex; /* protects list, tainted by GPU */
-			struct list_head list;
+			struct list_head active_list;
 
 			/* Pack multiple timelines' seqnos into the same page */
 			spinlock_t hwsp_lock;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4e0de22f0166..9c499edb4c13 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3246,7 +3246,7 @@ wait_for_timelines(struct drm_i915_private *i915,
 		return timeout;
 
 	mutex_lock(&gt->mutex);
-	list_for_each_entry(tl, &gt->list, link) {
+	list_for_each_entry(tl, &gt->active_list, link) {
 		struct i915_request *rq;
 
 		rq = i915_gem_active_get_unlocked(&tl->last_request);
@@ -3274,7 +3274,7 @@ wait_for_timelines(struct drm_i915_private *i915,
 
 		/* restart after reacquiring the lock */
 		mutex_lock(&gt->mutex);
-		tl = list_entry(&gt->list, typeof(*tl), link);
+		tl = list_entry(&gt->active_list, typeof(*tl), link);
 	}
 	mutex_unlock(&gt->mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index bd82f9b1043f..acf3c777e49d 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -856,7 +856,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	 * No more can be submitted until we reset the wedged bit.
 	 */
 	mutex_lock(&i915->gt.timelines.mutex);
-	list_for_each_entry(tl, &i915->gt.timelines.list, link) {
+	list_for_each_entry(tl, &i915->gt.timelines.active_list, link) {
 		struct i915_request *rq;
 		long timeout;
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index e4c11414a824..79838d89bdb9 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -120,7 +120,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
 		       const char *name,
 		       struct i915_vma *hwsp)
 {
-	struct i915_gt_timelines *gt = &i915->gt.timelines;
 	void *vaddr;
 
 	/*
@@ -168,10 +167,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
 
 	i915_syncmap_init(&timeline->sync);
 
-	mutex_lock(&gt->mutex);
-	list_add(&timeline->link, &gt->list);
-	mutex_unlock(&gt->mutex);
-
 	return 0;
 }
 
@@ -180,7 +175,7 @@ void i915_timelines_init(struct drm_i915_private *i915)
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
 
 	mutex_init(&gt->mutex);
-	INIT_LIST_HEAD(&gt->list);
+	INIT_LIST_HEAD(&gt->active_list);
 
 	spin_lock_init(&gt->hwsp_lock);
 	INIT_LIST_HEAD(&gt->hwsp_free_list);
@@ -189,6 +184,24 @@ void i915_timelines_init(struct drm_i915_private *i915)
 	i915_gem_shrinker_taints_mutex(i915, &gt->mutex);
 }
 
+static void timeline_add_to_active(struct i915_timeline *tl)
+{
+	struct i915_gt_timelines *gt = &tl->i915->gt.timelines;
+
+	mutex_lock(&gt->mutex);
+	list_add(&tl->link, &gt->active_list);
+	mutex_unlock(&gt->mutex);
+}
+
+static void timeline_remove_from_active(struct i915_timeline *tl)
+{
+	struct i915_gt_timelines *gt = &tl->i915->gt.timelines;
+
+	mutex_lock(&gt->mutex);
+	list_del(&tl->link);
+	mutex_unlock(&gt->mutex);
+}
+
 /**
  * i915_timelines_park - called when the driver idles
  * @i915: the drm_i915_private device
@@ -205,7 +218,7 @@ void i915_timelines_park(struct drm_i915_private *i915)
 	struct i915_timeline *timeline;
 
 	mutex_lock(&gt->mutex);
-	list_for_each_entry(timeline, &gt->list, link) {
+	list_for_each_entry(timeline, &gt->active_list, link) {
 		/*
 		 * All known fences are completed so we can scrap
 		 * the current sync point tracking and start afresh,
@@ -219,15 +232,9 @@ void i915_timelines_park(struct drm_i915_private *i915)
 
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
-	struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
-
 	GEM_BUG_ON(timeline->pin_count);
 	GEM_BUG_ON(!list_empty(&timeline->requests));
 
-	mutex_lock(&gt->mutex);
-	list_del(&timeline->link);
-	mutex_unlock(&gt->mutex);
-
 	i915_syncmap_free(&timeline->sync);
 	hwsp_free(timeline);
 
@@ -274,6 +281,8 @@ int i915_timeline_pin(struct i915_timeline *tl)
 		i915_ggtt_offset(tl->hwsp_ggtt) +
 		offset_in_page(tl->hwsp_offset);
 
+	timeline_add_to_active(tl);
+
 	return 0;
 
 unpin:
@@ -287,6 +296,8 @@ void i915_timeline_unpin(struct i915_timeline *tl)
 	if (--tl->pin_count)
 		return;
 
+	timeline_remove_from_active(tl);
+
 	/*
 	 * Since this timeline is idle, all bariers upon which we were waiting
 	 * must also be complete and so we can discard the last used barriers
@@ -310,7 +321,7 @@ void i915_timelines_fini(struct drm_i915_private *i915)
 {
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
 
-	GEM_BUG_ON(!list_empty(&gt->list));
+	GEM_BUG_ON(!list_empty(&gt->active_list));
 	GEM_BUG_ON(!list_empty(&gt->hwsp_free_list));
 
 	mutex_destroy(&gt->mutex);

From 3d6535cbed4a4b029602ff83efb2adec7cb8d28b Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 24 Jan 2019 14:01:14 +0100
Subject: [PATCH 49/89] drm/i915: Enable fastboot by default on Skylake and
 newer

We really want to have fastboot enabled by default to avoid an ugly
modeset during boot.

Rather then enabling it everywhere, lets start with enabling it on
Skylake and newer.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190124130114.3967-1-maarten.lankhorst@linux.intel.com
---
 drivers/gpu/drm/i915/i915_params.c   |  6 ++++--
 drivers/gpu/drm/i915/i915_params.h   |  2 +-
 drivers/gpu/drm/i915/intel_display.c | 11 ++++++++++-
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 9f0539bdaa39..b5be0abbba35 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -97,8 +97,10 @@ i915_param_named_unsafe(disable_power_well, int, 0400,
 
 i915_param_named_unsafe(enable_ips, int, 0600, "Enable IPS (default: true)");
 
-i915_param_named(fastboot, bool, 0600,
-	"Try to skip unnecessary mode sets at boot time (default: false)");
+i915_param_named(fastboot, int, 0600,
+	"Try to skip unnecessary mode sets at boot time "
+	"(0=disabled, 1=enabled) "
+	"Default: -1 (use per-chip default)");
 
 i915_param_named_unsafe(prefault_disable, bool, 0600,
 	"Disable page prefaulting for pread/pwrite/reloc (default:false). "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 6efcf330bdab..3f14e9881a0d 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -63,10 +63,10 @@ struct drm_printer;
 	param(int, edp_vswing, 0) \
 	param(int, reset, 2) \
 	param(unsigned int, inject_load_failure, 0) \
+	param(int, fastboot, -1) \
 	/* leave bools at the end to not create holes */ \
 	param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \
 	param(bool, enable_hangcheck, true) \
-	param(bool, fastboot, false) \
 	param(bool, prefault_disable, false) \
 	param(bool, load_detect_test, false) \
 	param(bool, force_reset_modeset_test, false) \
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 6cf480a0eb23..734ac77c9a8e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11719,6 +11719,15 @@ pipe_config_err(bool adjust, const char *name, const char *format, ...)
 	va_end(args);
 }
 
+static bool fastboot_enabled(struct drm_i915_private *dev_priv)
+{
+	if (i915_modparams.fastboot != -1)
+		return i915_modparams.fastboot;
+
+	/* Enable fastboot by default on Skylake and newer */
+	return INTEL_GEN(dev_priv) >= 9;
+}
+
 static bool
 intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 			  struct intel_crtc_state *current_config,
@@ -11730,7 +11739,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 		(current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) &&
 		!(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED);
 
-	if (fixup_inherited && !i915_modparams.fastboot) {
+	if (fixup_inherited && !fastboot_enabled(dev_priv)) {
 		DRM_DEBUG_KMS("initial modeset and fastboot not set\n");
 		ret = false;
 	}

From 968bf969b47df2481022b9a05eaab02948eec088 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 25 Jan 2019 20:38:46 +0200
Subject: [PATCH 50/89] drm/i915: Fix skl srckey mask bits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We're incorrectly masking off the R/V channel enable bit from
KEYMSK. Fix it up.

Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Fixes: b20815255693 ("drm/i915: Add plane alpha blending support, v2.")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125183846.28755-1-ville.syrjala@linux.intel.com
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/intel_sprite.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index b02d3d9809e3..cd42e81f8a90 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -493,7 +493,7 @@ skl_program_plane(struct intel_plane *plane,
 
 	keymax = (key->max_value & 0xffffff) | PLANE_KEYMAX_ALPHA(alpha);
 
-	keymsk = key->channel_mask & 0x3ffffff;
+	keymsk = key->channel_mask & 0x7ffffff;
 	if (alpha < 0xff)
 		keymsk |= PLANE_KEYMSK_ALPHA_ENABLE;
 

From 06039d98202f31dbf85af76d2659aaaa455ee0cb Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 29 Jan 2019 18:54:49 +0000
Subject: [PATCH 51/89] drm/i915/selftests: Apply a subtest filter

In bringup on simulated HW even rudimentary tests are slow, and so many
may fail that we want to be able to filter out the noise to focus on the
specific problem. Even just the tests groups provided for igt is not
specific enough, and we would like to isolate one particular subtest
(and probably subsubtests!). For simplicity, allow the user to provide a
command line parameter such as

	i915.st_filter=i915_timeline_mock_selftests/igt_sync

to restrict ourselves to only running on subtest. The exact name to use
is given during a normal run, highlighted as an error if it failed,
debug otherwise. The test group is optional, and then all subtests are
compared for an exact match with the filter (most subtests have unique
names). The filter can be negated, e.g. i915.st_filter=!igt_sync and
then all tests but those that match will be run. More than one match can
be supplied separated by a comma, e.g.

	i915.st_filter=igt_vma_create,igt_vma_pin1

to only run those specified, or

	i915.st_filter=!igt_vma_create,!igt_vma_pin1

to run all but those named. Mixing a blacklist and whitelist will only
execute those subtests matching the whitelist so long as they are
previously excluded in the blacklist.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129185452.20989-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_selftest.h          |  1 +
 .../gpu/drm/i915/selftests/i915_selftest.c    | 47 +++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h
index a73472dd12fd..207e21b478f2 100644
--- a/drivers/gpu/drm/i915/i915_selftest.h
+++ b/drivers/gpu/drm/i915/i915_selftest.h
@@ -31,6 +31,7 @@ struct i915_selftest {
 	unsigned long timeout_jiffies;
 	unsigned int timeout_ms;
 	unsigned int random_seed;
+	char *filter;
 	int mock;
 	int live;
 };
diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index 86c54ea37f48..10ef0e636a24 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
@@ -197,6 +197,49 @@ int i915_live_selftests(struct pci_dev *pdev)
 	return 0;
 }
 
+static bool apply_subtest_filter(const char *caller, const char *name)
+{
+	char *filter, *sep, *tok;
+	bool result = true;
+
+	filter = kstrdup(i915_selftest.filter, GFP_KERNEL);
+	for (sep = filter; (tok = strsep(&sep, ","));) {
+		bool allow = true;
+		char *sl;
+
+		if (*tok == '!') {
+			allow = false;
+			tok++;
+		}
+
+		if (*tok == '\0')
+			continue;
+
+		sl = strchr(tok, '/');
+		if (sl) {
+			*sl++ = '\0';
+			if (strcmp(tok, caller)) {
+				if (allow)
+					result = false;
+				continue;
+			}
+			tok = sl;
+		}
+
+		if (strcmp(tok, name)) {
+			if (allow)
+				result = false;
+			continue;
+		}
+
+		result = allow;
+		break;
+	}
+	kfree(filter);
+
+	return result;
+}
+
 int __i915_subtests(const char *caller,
 		    const struct i915_subtest *st,
 		    unsigned int count,
@@ -209,6 +252,9 @@ int __i915_subtests(const char *caller,
 		if (signal_pending(current))
 			return -EINTR;
 
+		if (!apply_subtest_filter(caller, st->name))
+			continue;
+
 		pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name);
 		GEM_TRACE("Running %s/%s\n", caller, st->name);
 
@@ -244,6 +290,7 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...)
 
 module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400);
 module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400);
+module_param_named(st_filter, i915_selftest.filter, charp, 0400);
 
 module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400);
 MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)");

From 8547444137ec6138ce52fc1938980b737a0d4d9e Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 29 Jan 2019 18:54:50 +0000
Subject: [PATCH 52/89] drm/i915: Identify active requests

To allow requests to forgo a common execution timeline, one question we
need to be able to answer is "is this request running?". To track
whether a request has started on HW, we can emit a breadcrumb at the
beginning of the request and check its timeline's HWSP to see if the
breadcrumb has advanced past the start of this request. (This is in
contrast to the global timeline where we need only ask if we are on the
global timeline and if the timeline has advanced past the end of the
previous request.)

There is still confusion from a preempted request, which has already
started but relinquished the HW to a high priority request. For the
common case, this discrepancy should be negligible. However, for
identification of hung requests, knowing which one was running at the
time of the hang will be much more important.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129185452.20989-2-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem.c              | 15 ++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   | 12 ++++++
 drivers/gpu/drm/i915/i915_request.c          | 10 ++---
 drivers/gpu/drm/i915/i915_request.h          |  1 +
 drivers/gpu/drm/i915/i915_timeline.c         |  1 +
 drivers/gpu/drm/i915/i915_timeline.h         |  2 +
 drivers/gpu/drm/i915/intel_engine_cs.c       |  8 ++--
 drivers/gpu/drm/i915/intel_lrc.c             | 39 +++++++++++++++++---
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 25 ++++++++-----
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  6 ++-
 drivers/gpu/drm/i915/selftests/mock_engine.c |  2 +-
 11 files changed, 96 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9c499edb4c13..d92e7ab0005e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2871,6 +2871,14 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
+static bool match_ring(struct i915_request *rq)
+{
+	struct drm_i915_private *dev_priv = rq->i915;
+	u32 ring = I915_READ(RING_START(rq->engine->mmio_base));
+
+	return ring == i915_ggtt_offset(rq->ring->vma);
+}
+
 struct i915_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
@@ -2893,6 +2901,13 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 		if (i915_request_completed(request))
 			continue;
 
+		if (!i915_request_started(request))
+			break;
+
+		/* More than one preemptible request may match! */
+		if (!match_ring(request))
+			break;
+
 		active = request;
 		break;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f250109e1f66..8eedf7cac493 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1976,6 +1976,18 @@ static int eb_submit(struct i915_execbuffer *eb)
 			return err;
 	}
 
+	/*
+	 * After we completed waiting for other engines (using HW semaphores)
+	 * then we can signal that this request/batch is ready to run. This
+	 * allows us to determine if the batch is still waiting on the GPU
+	 * or actually running by checking the breadcrumb.
+	 */
+	if (eb->engine->emit_init_breadcrumb) {
+		err = eb->engine->emit_init_breadcrumb(eb->request);
+		if (err)
+			return err;
+	}
+
 	err = eb->engine->emit_bb_start(eb->request,
 					eb->batch->node.start +
 					eb->batch_start_offset,
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 4d58770e6a8c..7db15b7b3de8 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -333,7 +333,7 @@ void i915_request_retire_upto(struct i915_request *rq)
 
 static u32 timeline_get_seqno(struct i915_timeline *tl)
 {
-	return ++tl->seqno;
+	return tl->seqno += 1 + tl->has_initial_breadcrumb;
 }
 
 static void move_to_timeline(struct i915_request *request,
@@ -382,8 +382,8 @@ void __i915_request_submit(struct i915_request *request)
 		intel_engine_enable_signaling(request, false);
 	spin_unlock(&request->lock);
 
-	engine->emit_breadcrumb(request,
-				request->ring->vaddr + request->postfix);
+	engine->emit_fini_breadcrumb(request,
+				     request->ring->vaddr + request->postfix);
 
 	/* Transfer from per-context onto the global per-engine timeline */
 	move_to_timeline(request, &engine->timeline);
@@ -657,7 +657,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * around inside i915_request_add() there is sufficient space at
 	 * the beginning of the ring as well.
 	 */
-	rq->reserved_space = 2 * engine->emit_breadcrumb_dw * sizeof(u32);
+	rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32);
 
 	/*
 	 * Record the position of the start of the request so that
@@ -908,7 +908,7 @@ void i915_request_add(struct i915_request *request)
 	 * GPU processing the request, we never over-estimate the
 	 * position of the ring's HEAD.
 	 */
-	cs = intel_ring_begin(request, engine->emit_breadcrumb_dw);
+	cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw);
 	GEM_BUG_ON(IS_ERR(cs));
 	request->postfix = intel_ring_offset(request, cs);
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 96c586d6ff4d..340d6216791c 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -344,6 +344,7 @@ static inline bool i915_request_started(const struct i915_request *rq)
 	if (i915_request_signaled(rq))
 		return true;
 
+	/* Remember: started but may have since been preempted! */
 	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 79838d89bdb9..5ea3af393ffe 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -135,6 +135,7 @@ int i915_timeline_init(struct drm_i915_private *i915,
 	timeline->i915 = i915;
 	timeline->name = name;
 	timeline->pin_count = 0;
+	timeline->has_initial_breadcrumb = !hwsp;
 
 	timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
 	if (!hwsp) {
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index ab736e2e5707..8caeb66d1cd5 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -48,6 +48,8 @@ struct i915_timeline {
 	struct i915_vma *hwsp_ggtt;
 	u32 hwsp_offset;
 
+	bool has_initial_breadcrumb;
+
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
 	 * outstanding.
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index ead9c4371fe1..8dca76f6315d 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -664,7 +664,7 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 	if (dw < 0)
 		goto out_timeline;
 
-	dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs;
+	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
 
 	i915_timeline_unpin(&frame->timeline);
 
@@ -725,7 +725,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	if (ret < 0)
 		goto err_breadcrumbs;
 
-	engine->emit_breadcrumb_dw = ret;
+	engine->emit_fini_breadcrumb_dw = ret;
 
 	return 0;
 
@@ -1297,7 +1297,9 @@ static void print_request(struct drm_printer *m,
 	drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n",
 		   prefix,
 		   rq->global_seqno,
-		   i915_request_completed(rq) ? "!" : "",
+		   i915_request_completed(rq) ? "!" :
+		   i915_request_started(rq) ? "*" :
+		   "",
 		   rq->fence.context, rq->fence.seqno,
 		   buf,
 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fdbb3fe8eac9..5db16dd8e844 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -624,7 +624,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 * WaIdleLiteRestore:bdw,skl
 		 * Apply the wa NOOPs to prevent
 		 * ring:HEAD == rq:TAIL as we resubmit the
-		 * request. See gen8_emit_breadcrumb() for
+		 * request. See gen8_emit_fini_breadcrumb() for
 		 * where we prepare the padding after the
 		 * end of the request.
 		 */
@@ -1283,6 +1283,34 @@ execlists_context_pin(struct intel_engine_cs *engine,
 	return __execlists_context_pin(engine, ctx, ce);
 }
 
+static int gen8_emit_init_breadcrumb(struct i915_request *rq)
+{
+	u32 *cs;
+
+	GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
+
+	cs = intel_ring_begin(rq, 6);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/*
+	 * Check if we have been preempted before we even get started.
+	 *
+	 * After this point i915_request_started() reports true, even if
+	 * we get preempted and so are no longer running.
+	 */
+	*cs++ = MI_ARB_CHECK;
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = rq->timeline->hwsp_offset;
+	*cs++ = 0;
+	*cs++ = rq->fence.seqno - 1;
+
+	intel_ring_advance(rq, cs);
+	return 0;
+}
+
 static int emit_pdps(struct i915_request *rq)
 {
 	const struct intel_engine_cs * const engine = rq->engine;
@@ -2039,7 +2067,7 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 	return cs;
 }
 
-static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
@@ -2061,7 +2089,7 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
 	return gen8_emit_wa_tail(request, cs);
 }
 
-static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write_rcs(cs,
 				      request->fence.seqno,
@@ -2176,7 +2204,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->request_alloc = execlists_request_alloc;
 
 	engine->emit_flush = gen8_emit_flush;
-	engine->emit_breadcrumb = gen8_emit_breadcrumb;
+	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
+	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
 
 	engine->set_default_submission = intel_execlists_set_default_submission;
 
@@ -2289,7 +2318,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 	/* Override some for render ring. */
 	engine->init_context = gen8_init_rcs_context;
 	engine->emit_flush = gen8_emit_flush_render;
-	engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs;
+	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
 
 	ret = logical_ring_init(engine);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ee3719324e2d..668ed67336a2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1607,6 +1607,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 		err = PTR_ERR(timeline);
 		goto err;
 	}
+	GEM_BUG_ON(timeline->has_initial_breadcrumb);
 
 	ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
 	i915_timeline_put(timeline);
@@ -1960,6 +1961,7 @@ static int ring_request_alloc(struct i915_request *request)
 	int ret;
 
 	GEM_BUG_ON(!request->hw_context->pin_count);
+	GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
 
 	/*
 	 * Flush enough space to reduce the likelihood of waiting after
@@ -2296,9 +2298,14 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	engine->context_pin = intel_ring_context_pin;
 	engine->request_alloc = ring_request_alloc;
 
-	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
+	/*
+	 * Using a global execution timeline; the previous final breadcrumb is
+	 * equivalent to our next initial bread so we can elide
+	 * engine->emit_init_breadcrumb().
+	 */
+	engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
 	if (IS_GEN(dev_priv, 5))
-		engine->emit_breadcrumb = gen5_emit_breadcrumb;
+		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
 
 	engine->set_default_submission = i9xx_set_default_submission;
 
@@ -2327,11 +2334,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 	if (INTEL_GEN(dev_priv) >= 7) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen7_render_ring_flush;
-		engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb;
+		engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
 	} else if (IS_GEN(dev_priv, 6)) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen6_render_ring_flush;
-		engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb;
+		engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
 	} else if (IS_GEN(dev_priv, 5)) {
 		engine->emit_flush = gen4_render_ring_flush;
 	} else {
@@ -2368,9 +2375,9 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 
 		if (IS_GEN(dev_priv, 6))
-			engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
+			engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
 		else
-			engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
+			engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
 	} else {
 		engine->emit_flush = bsd_ring_flush;
 		if (IS_GEN(dev_priv, 5))
@@ -2394,9 +2401,9 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
 	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 
 	if (IS_GEN(dev_priv, 6))
-		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
+		engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
 	else
-		engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
+		engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
 
 	return intel_init_ring_buffer(engine);
 }
@@ -2414,7 +2421,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 	engine->irq_enable = hsw_vebox_irq_enable;
 	engine->irq_disable = hsw_vebox_irq_disable;
 
-	engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
+	engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
 
 	return intel_init_ring_buffer(engine);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2927b712b973..1f30ffb84936 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -463,8 +463,10 @@ struct intel_engine_cs {
 					 unsigned int dispatch_flags);
 #define I915_DISPATCH_SECURE BIT(0)
 #define I915_DISPATCH_PINNED BIT(1)
-	u32		*(*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
-	int		emit_breadcrumb_dw;
+	int		 (*emit_init_breadcrumb)(struct i915_request *rq);
+	u32		*(*emit_fini_breadcrumb)(struct i915_request *rq,
+						 u32 *cs);
+	unsigned int	emit_fini_breadcrumb_dw;
 
 	/* Pass the request to the hardware queue (e.g. directly into
 	 * the legacy ringbuffer or to the end of an execlist).
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 95e890d7f58b..3b226ebc6bc4 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -227,7 +227,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.context_pin = mock_context_pin;
 	engine->base.request_alloc = mock_request_alloc;
 	engine->base.emit_flush = mock_emit_flush;
-	engine->base.emit_breadcrumb = mock_emit_breadcrumb;
+	engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
 	engine->base.submit_request = mock_submit_request;
 
 	if (i915_timeline_init(i915,

From 4d97cbe01980c0d008d125903ef9ff05b6640c2d Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 29 Jan 2019 18:54:51 +0000
Subject: [PATCH 53/89] drm/i915: Rename execlists->queue_priority to
 queue_priority_hint

After noticing that we trigger preemption events for currently executing
requests, as well as requests that complete before the preemption and
attempting to suppress those preemption events, it is wise to not
consider the queue_priority to be authoritative. As we only track the
maximum priority seen between dequeue passes, if the maximum priority
request is no longer available for dequeuing (it completed or is even
executing on another engine), we have no knowledge of the previous
queue_priority as it would require us to keep a full history of enqueued
requests -- but we already have that history in the priolists!

Rename the queue_priority to queue_priority_hint so that we do not
confuse it as being exactly the maximum priority in the queue, but merely
an indication that we have seen a new maximum priority value and as such
we should check whether it should preempt the currently running request.

v2: s/preempt_priority_hint/queue_priority_hint/ as preempt implies it
being only used for the singular task of preemption and not the wider
question of waking up due to a change in the queue.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129185452.20989-3-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_scheduler.c       | 11 +++++------
 drivers/gpu/drm/i915/intel_engine_cs.c      |  4 ++--
 drivers/gpu/drm/i915/intel_guc_submission.c |  5 +++--
 drivers/gpu/drm/i915/intel_lrc.c            | 20 +++++++++++---------
 drivers/gpu/drm/i915/intel_ringbuffer.h     |  8 ++++++--
 5 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 340faea6c08a..4eeba588b996 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -127,8 +127,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 	return rb_entry(rb, struct i915_priolist, node);
 }
 
-static void assert_priolists(struct intel_engine_execlists * const execlists,
-			     long queue_priority)
+static void assert_priolists(struct intel_engine_execlists * const execlists)
 {
 	struct rb_node *rb;
 	long last_prio, i;
@@ -139,7 +138,7 @@ static void assert_priolists(struct intel_engine_execlists * const execlists,
 	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
 		   rb_first(&execlists->queue.rb_root));
 
-	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
+	last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
 		const struct i915_priolist *p = to_priolist(rb);
 
@@ -166,7 +165,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
 	int idx, i;
 
 	lockdep_assert_held(&engine->timeline.lock);
-	assert_priolists(execlists, INT_MAX);
+	assert_priolists(execlists);
 
 	/* buckets sorted from highest [in slot 0] to lowest priority */
 	idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
@@ -353,7 +352,7 @@ static void __i915_schedule(struct i915_request *rq,
 				continue;
 		}
 
-		if (prio <= engine->execlists.queue_priority)
+		if (prio <= engine->execlists.queue_priority_hint)
 			continue;
 
 		/*
@@ -366,7 +365,7 @@ static void __i915_schedule(struct i915_request *rq,
 			continue;
 
 		/* Defer (tasklet) submission until after all of our updates. */
-		engine->execlists.queue_priority = prio;
+		engine->execlists.queue_priority_hint = prio;
 		tasklet_hi_schedule(&engine->execlists.tasklet);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 8dca76f6315d..0a610c9691fd 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -480,7 +480,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
 
-	execlists->queue_priority = INT_MIN;
+	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
 }
 
@@ -1178,7 +1178,7 @@ void intel_engines_park(struct drm_i915_private *i915)
 		}
 
 		/* Must be reset upon idling, or we may miss the busy wakeup. */
-		GEM_BUG_ON(engine->execlists.queue_priority != INT_MIN);
+		GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
 
 		if (engine->park)
 			engine->park(engine);
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 4295ade0d613..f12ecc8dec6b 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -737,7 +737,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 		if (intel_engine_has_preemption(engine)) {
 			struct guc_preempt_work *preempt_work =
 				&engine->i915->guc.preempt_work[engine->id];
-			int prio = execlists->queue_priority;
+			int prio = execlists->queue_priority_hint;
 
 			if (__execlists_need_preempt(prio, port_prio(port))) {
 				execlists_set_active(execlists,
@@ -783,7 +783,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
 done:
-	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
+	execlists->queue_priority_hint =
+		rb ? to_priolist(rb)->priority : INT_MIN;
 	if (submit)
 		port_assign(port, last);
 	if (last)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5db16dd8e844..a1034f7069aa 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -591,7 +591,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
 			return;
 
-		if (need_preempt(engine, last, execlists->queue_priority)) {
+		if (need_preempt(engine, last, execlists->queue_priority_hint)) {
 			inject_preempt_context(engine);
 			return;
 		}
@@ -699,20 +699,20 @@ done:
 	/*
 	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
 	 *
-	 * We choose queue_priority such that if we add a request of greater
+	 * We choose the priority hint such that if we add a request of greater
 	 * priority than this, we kick the submission tasklet to decide on
 	 * the right order of submitting the requests to hardware. We must
 	 * also be prepared to reorder requests as they are in-flight on the
-	 * HW. We derive the queue_priority then as the first "hole" in
+	 * HW. We derive the priority hint then as the first "hole" in
 	 * the HW submission ports and if there are no available slots,
 	 * the priority of the lowest executing request, i.e. last.
 	 *
 	 * When we do receive a higher priority request ready to run from the
-	 * user, see queue_request(), the queue_priority is bumped to that
+	 * user, see queue_request(), the priority hint is bumped to that
 	 * request triggering preemption on the next dequeue (or subsequent
 	 * interrupt for secondary ports).
 	 */
-	execlists->queue_priority =
+	execlists->queue_priority_hint =
 		port != execlists->port ? rq_prio(last) : INT_MIN;
 
 	if (submit) {
@@ -861,7 +861,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
-	execlists->queue_priority = INT_MIN;
+	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
 	GEM_BUG_ON(port_isset(execlists->port));
 
@@ -1092,8 +1092,8 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
 
 static void submit_queue(struct intel_engine_cs *engine, int prio)
 {
-	if (prio > engine->execlists.queue_priority) {
-		engine->execlists.queue_priority = prio;
+	if (prio > engine->execlists.queue_priority_hint) {
+		engine->execlists.queue_priority_hint = prio;
 		__submit_queue_imm(engine);
 	}
 }
@@ -2777,7 +2777,9 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 
 	last = NULL;
 	count = 0;
-	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
+	if (execlists->queue_priority_hint != INT_MIN)
+		drm_printf(m, "\t\tQueue priority hint: %d\n",
+			   execlists->queue_priority_hint);
 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
 		int i;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 1f30ffb84936..ef024c154a1b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -293,14 +293,18 @@ struct intel_engine_execlists {
 	unsigned int port_mask;
 
 	/**
-	 * @queue_priority: Highest pending priority.
+	 * @queue_priority_hint: Highest pending priority.
 	 *
 	 * When we add requests into the queue, or adjust the priority of
 	 * executing requests, we compute the maximum priority of those
 	 * pending requests. We can then use this value to determine if
 	 * we need to preempt the executing requests to service the queue.
+	 * However, since the we may have recorded the priority of an inflight
+	 * request we wanted to preempt but since completed, at the time of
+	 * dequeuing the priority hint may no longer may match the highest
+	 * available request priority.
 	 */
-	int queue_priority;
+	int queue_priority_hint;
 
 	/**
 	 * @queue: queue of requests, in priority lists

From c9a646228816efeeacb05cc9400464ad8aa90017 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 29 Jan 2019 18:54:52 +0000
Subject: [PATCH 54/89] drm/i915/execlists: Suppress preempting self

In order to avoid preempting ourselves, we currently refuse to schedule
the tasklet if we reschedule an inflight context. However, this glosses
over a few issues such as what happens after a CS completion event and
we then preempt the newly executing context with itself, or if something
else causes a tasklet_schedule triggering the same evaluation to
preempt the active context with itself.

However, when we avoid preempting ELSP[0], we still retain the preemption
value as it may match a second preemption request within the same time period
that we need to resolve after the next CS event. However, since we only
store the maximum preemption priority seen, it may not match the
subsequent event and so we should double check whether or not we
actually do need to trigger a preempt-to-idle by comparing the top
priorities from each queue. Later, this gives us a hook for finer
control over deciding whether the preempt-to-idle is justified.

The sequence of events where we end up preempting for no avail is:

1. Queue requests/contexts A, B
2. Priority boost A; no preemption as it is executing, but keep hint
3. After CS switch, B is less than hint, force preempt-to-idle
4. Resubmit B after idling

v2: We can simplify a bunch of tests based on the knowledge that PI will
ensure that earlier requests along the same context will have the highest
priority.
v3: Demonstrate the stale preemption hint with a selftest

References: a2bf92e8cc16 ("drm/i915/execlists: Avoid kicking priority on the current context")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129185452.20989-4-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_scheduler.c       |  20 ++-
 drivers/gpu/drm/i915/intel_guc_submission.c |   2 +
 drivers/gpu/drm/i915/intel_lrc.c            |  98 ++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h     |   1 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c  | 138 ++++++++++++++++++++
 5 files changed, 246 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 4eeba588b996..2d172991024f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -238,6 +238,18 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 	return engine;
 }
 
+static bool inflight(const struct i915_request *rq,
+		     const struct intel_engine_cs *engine)
+{
+	const struct i915_request *active;
+
+	if (!rq->global_seqno)
+		return false;
+
+	active = port_request(engine->execlists.port);
+	return active->hw_context == rq->hw_context;
+}
+
 static void __i915_schedule(struct i915_request *rq,
 			    const struct i915_sched_attr *attr)
 {
@@ -327,6 +339,7 @@ static void __i915_schedule(struct i915_request *rq,
 		INIT_LIST_HEAD(&dep->dfs_link);
 
 		engine = sched_lock_engine(node, engine);
+		lockdep_assert_held(&engine->timeline.lock);
 
 		/* Recheck after acquiring the engine->timeline.lock */
 		if (prio <= node->attr.priority || node_signaled(node))
@@ -355,17 +368,16 @@ static void __i915_schedule(struct i915_request *rq,
 		if (prio <= engine->execlists.queue_priority_hint)
 			continue;
 
+		engine->execlists.queue_priority_hint = prio;
+
 		/*
 		 * If we are already the currently executing context, don't
 		 * bother evaluating if we should preempt ourselves.
 		 */
-		if (node_to_request(node)->global_seqno &&
-		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
-				      node_to_request(node)->global_seqno))
+		if (inflight(node_to_request(node), engine))
 			continue;
 
 		/* Defer (tasklet) submission until after all of our updates. */
-		engine->execlists.queue_priority_hint = prio;
 		tasklet_hi_schedule(&engine->execlists.tasklet);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index f12ecc8dec6b..8bc8aa54aa35 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -629,6 +629,8 @@ static void inject_preempt_context(struct work_struct *work)
 				       EXECLISTS_ACTIVE_PREEMPT);
 		tasklet_schedule(&engine->execlists.tasklet);
 	}
+
+	(void)I915_SELFTEST_ONLY(engine->execlists.preempt_hang.count++);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a1034f7069aa..a9eb0211ce77 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -188,13 +188,90 @@ static inline int rq_prio(const struct i915_request *rq)
 	return rq->sched.attr.priority;
 }
 
-static inline bool need_preempt(const struct intel_engine_cs *engine,
-				const struct i915_request *last,
-				int prio)
+static int queue_prio(const struct intel_engine_execlists *execlists)
 {
-	return (intel_engine_has_preemption(engine) &&
-		__execlists_need_preempt(prio, rq_prio(last)) &&
-		!i915_request_completed(last));
+	struct i915_priolist *p;
+	struct rb_node *rb;
+
+	rb = rb_first_cached(&execlists->queue);
+	if (!rb)
+		return INT_MIN;
+
+	/*
+	 * As the priolist[] are inverted, with the highest priority in [0],
+	 * we have to flip the index value to become priority.
+	 */
+	p = to_priolist(rb);
+	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
+}
+
+static inline bool need_preempt(const struct intel_engine_cs *engine,
+				const struct i915_request *rq)
+{
+	const int last_prio = rq_prio(rq);
+
+	if (!intel_engine_has_preemption(engine))
+		return false;
+
+	if (i915_request_completed(rq))
+		return false;
+
+	/*
+	 * Check if the current priority hint merits a preemption attempt.
+	 *
+	 * We record the highest value priority we saw during rescheduling
+	 * prior to this dequeue, therefore we know that if it is strictly
+	 * less than the current tail of ESLP[0], we do not need to force
+	 * a preempt-to-idle cycle.
+	 *
+	 * However, the priority hint is a mere hint that we may need to
+	 * preempt. If that hint is stale or we may be trying to preempt
+	 * ourselves, ignore the request.
+	 */
+	if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
+				      last_prio))
+		return false;
+
+	/*
+	 * Check against the first request in ELSP[1], it will, thanks to the
+	 * power of PI, be the highest priority of that context.
+	 */
+	if (!list_is_last(&rq->link, &engine->timeline.requests) &&
+	    rq_prio(list_next_entry(rq, link)) > last_prio)
+		return true;
+
+	/*
+	 * If the inflight context did not trigger the preemption, then maybe
+	 * it was the set of queued requests? Pick the highest priority in
+	 * the queue (the first active priolist) and see if it deserves to be
+	 * running instead of ELSP[0].
+	 *
+	 * The highest priority request in the queue can not be either
+	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
+	 * context, it's priority would not exceed ELSP[0] aka last_prio.
+	 */
+	return queue_prio(&engine->execlists) > last_prio;
+}
+
+__maybe_unused static inline bool
+assert_priority_queue(const struct intel_engine_execlists *execlists,
+		      const struct i915_request *prev,
+		      const struct i915_request *next)
+{
+	if (!prev)
+		return true;
+
+	/*
+	 * Without preemption, the prev may refer to the still active element
+	 * which we refuse to let go.
+	 *
+	 * Even with preemption, there are times when we think it is better not
+	 * to preempt and leave an ostensibly lower priority request in flight.
+	 */
+	if (port_request(execlists->port) == prev)
+		return true;
+
+	return rq_prio(prev) >= rq_prio(next);
 }
 
 /*
@@ -523,6 +600,8 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+
+	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
 }
 
 static void complete_preempt_context(struct intel_engine_execlists *execlists)
@@ -591,7 +670,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
 			return;
 
-		if (need_preempt(engine, last, execlists->queue_priority_hint)) {
+		if (need_preempt(engine, last)) {
 			inject_preempt_context(engine);
 			return;
 		}
@@ -637,8 +716,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		int i;
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
-			GEM_BUG_ON(last &&
-				   need_preempt(engine, last, rq_prio(rq)));
+			GEM_BUG_ON(!assert_priority_queue(execlists, last, rq));
 
 			/*
 			 * Can we combine this request with the current port?
@@ -884,6 +962,8 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u32 * const buf = execlists->csb_status;
 	u8 head, tail;
 
+	lockdep_assert_held(&engine->timeline.lock);
+
 	/*
 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
 	 * When reading from the csb_write mmio register, we have to be
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ef024c154a1b..953ccc2617ff 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -203,6 +203,7 @@ struct i915_priolist {
 
 struct st_preempt_hang {
 	struct completion completion;
+	unsigned int count;
 	bool inject_hang;
 };
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 2b2ecd76c2ac..fb35f53c9ce3 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -268,6 +268,143 @@ err_wedged:
 	goto err_ctx_lo;
 }
 
+struct preempt_client {
+	struct igt_spinner spin;
+	struct i915_gem_context *ctx;
+};
+
+static int preempt_client_init(struct drm_i915_private *i915,
+			       struct preempt_client *c)
+{
+	c->ctx = kernel_context(i915);
+	if (!c->ctx)
+		return -ENOMEM;
+
+	if (igt_spinner_init(&c->spin, i915))
+		goto err_ctx;
+
+	return 0;
+
+err_ctx:
+	kernel_context_close(c->ctx);
+	return -ENOMEM;
+}
+
+static void preempt_client_fini(struct preempt_client *c)
+{
+	igt_spinner_fini(&c->spin);
+	kernel_context_close(c->ctx);
+}
+
+static int live_suppress_self_preempt(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_sched_attr attr = {
+		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
+	};
+	struct preempt_client a, b;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	int err = -ENOMEM;
+
+	/*
+	 * Verify that if a preemption request does not cause a change in
+	 * the current execution order, the preempt-to-idle injection is
+	 * skipped and that we do not accidentally apply it after the CS
+	 * completion event.
+	 */
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0; /* presume black blox */
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	if (preempt_client_init(i915, &a))
+		goto err_unlock;
+	if (preempt_client_init(i915, &b))
+		goto err_client_a;
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq_a, *rq_b;
+		int depth;
+
+		engine->execlists.preempt_hang.count = 0;
+
+		rq_a = igt_spinner_create_request(&a.spin,
+						  a.ctx, engine,
+						  MI_NOOP);
+		if (IS_ERR(rq_a)) {
+			err = PTR_ERR(rq_a);
+			goto err_client_b;
+		}
+
+		i915_request_add(rq_a);
+		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
+			pr_err("First client failed to start\n");
+			goto err_wedged;
+		}
+
+		for (depth = 0; depth < 8; depth++) {
+			rq_b = igt_spinner_create_request(&b.spin,
+							  b.ctx, engine,
+							  MI_NOOP);
+			if (IS_ERR(rq_b)) {
+				err = PTR_ERR(rq_b);
+				goto err_client_b;
+			}
+			i915_request_add(rq_b);
+
+			GEM_BUG_ON(i915_request_completed(rq_a));
+			engine->schedule(rq_a, &attr);
+			igt_spinner_end(&a.spin);
+
+			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
+				pr_err("Second client failed to start\n");
+				goto err_wedged;
+			}
+
+			swap(a, b);
+			rq_a = rq_b;
+		}
+		igt_spinner_end(&a.spin);
+
+		if (engine->execlists.preempt_hang.count) {
+			pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n",
+			       engine->execlists.preempt_hang.count,
+			       depth);
+			err = -EINVAL;
+			goto err_client_b;
+		}
+
+		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+			goto err_wedged;
+	}
+
+	err = 0;
+err_client_b:
+	preempt_client_fini(&b);
+err_client_a:
+	preempt_client_fini(&a);
+err_unlock:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+
+err_wedged:
+	igt_spinner_end(&b.spin);
+	igt_spinner_end(&a.spin);
+	i915_gem_set_wedged(i915);
+	err = -EIO;
+	goto err_client_b;
+}
+
 static int live_preempt_hang(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -647,6 +784,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_sanitycheck),
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
+		SUBTEST(live_suppress_self_preempt),
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
 	};

From 5a3db6f08a8eae15bff597dce290ac238daeb717 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 29 Jan 2019 19:06:09 +0200
Subject: [PATCH 55/89] drm: Constify drm_color_lut_check()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drm_color_lut_check() doens't modify the passed in blob so
let's make it const.

Also s/uint32_t/u32/ while at it.

v2: Reduce line wraps (Sam)

Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129170609.5718-1-ville.syrjala@linux.intel.com
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_color_mgmt.c | 5 ++---
 include/drm/drm_color_mgmt.h     | 3 +--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c
index 968ca7c91ad8..d5d34d0c79c7 100644
--- a/drivers/gpu/drm/drm_color_mgmt.c
+++ b/drivers/gpu/drm/drm_color_mgmt.c
@@ -474,10 +474,9 @@ EXPORT_SYMBOL(drm_plane_create_color_properties);
  *
  * Returns 0 on success, -EINVAL on failure.
  */
-int drm_color_lut_check(struct drm_property_blob *lut,
-			uint32_t tests)
+int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests)
 {
-	struct drm_color_lut *entry;
+	const struct drm_color_lut *entry;
 	int i;
 
 	if (!lut || !tests)
diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h
index 6affbda6d9cb..d1c662d92ab7 100644
--- a/include/drm/drm_color_mgmt.h
+++ b/include/drm/drm_color_mgmt.h
@@ -96,6 +96,5 @@ enum drm_color_lut_tests {
 	DRM_COLOR_LUT_NON_DECREASING = BIT(1),
 };
 
-int drm_color_lut_check(struct drm_property_blob *lut,
-			uint32_t tests);
+int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests);
 #endif

From addc80f0bba9f015208132b804397d9d08399465 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 29 Jan 2019 16:19:12 +0200
Subject: [PATCH 56/89] drm/i915/tv: Fix adjusted_mode dotclock for interlaced
 modes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

intel_tv_mode_to_mode() assumes the pipe will be in progressive
fetch mode, and thus when programming the pipe into interlaced
mode we have to halve the calculated dotclock to get the correct
field duration.

This becomes more important when we start to program the pipe
into interlaced mode on i965gm as we depend on the timestamps
to get accurate frame counter values. Withot halving the clock
our guesstimated frame counter would tick at twice the expected
speed.

Cc: Imre Deak <imre.deak@intel.com>
Fixes: 690157f0a9e7 ("drm/i915/tv: Fix >1024 modes on gen3")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129141913.5515-1-ville.syrjala@linux.intel.com
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_tv.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 751b88dde18e..3a5682f44e55 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1150,6 +1150,8 @@ intel_tv_get_config(struct intel_encoder *encoder,
 				 ypos, mode.vdisplay - ysize - ypos);
 
 	adjusted_mode->crtc_clock = mode.clock;
+	if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
+		adjusted_mode->crtc_clock /= 2;
 
 	/* pixel counter doesn't work on i965gm TV output */
 	if (IS_I965GM(dev_priv))
@@ -1214,8 +1216,11 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 
 		tv_conn_state->bypass_vfilter = true;
 
-		if (!tv_mode->progressive)
+		if (!tv_mode->progressive) {
+			adjusted_mode->clock /= 2;
+			adjusted_mode->crtc_clock /= 2;
 			adjusted_mode->flags |= DRM_MODE_FLAG_INTERLACE;
+		}
 	} else {
 		tv_conn_state->margins.top = conn_state->tv.margins.top;
 		tv_conn_state->margins.bottom = conn_state->tv.margins.bottom;

From 68e94f62cfbe1650fb783979150a2b2f0a3eca1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 29 Jan 2019 16:19:13 +0200
Subject: [PATCH 57/89] drm/i915/tv: Bypass the vertical filter if possible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Let's switch the pipe into interlaced mode and switch off
the TV encoder vertical filter if the pipe vdisplay
matches the TV YSIZE exactly.

While I didn't measure it I presume this might reduce
the power consumption a little bit, and the pixel rate
is halved as the pipe will now fetching in interlaced
mode rather than in progressive mode (effectively the
same difference as between IF-ID vs. PF-ID pfit modes
on more modern hardware) so a bit easier on the memory
bandwidth.

Cc: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129141913.5515-2-ville.syrjala@linux.intel.com
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_tv.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 3a5682f44e55..3924c4944e1f 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1159,6 +1159,22 @@ intel_tv_get_config(struct intel_encoder *encoder,
 			I915_MODE_FLAG_USE_SCANLINE_COUNTER;
 }
 
+static bool intel_tv_source_too_wide(struct drm_i915_private *dev_priv,
+				     int hdisplay)
+{
+	return IS_GEN(dev_priv, 3) && hdisplay > 1024;
+}
+
+static bool intel_tv_vert_scaling(const struct drm_display_mode *tv_mode,
+				  const struct drm_connector_state *conn_state,
+				  int vdisplay)
+{
+	return tv_mode->crtc_vdisplay -
+		conn_state->tv.margins.top -
+		conn_state->tv.margins.bottom !=
+		vdisplay;
+}
+
 static int
 intel_tv_compute_config(struct intel_encoder *encoder,
 			struct intel_crtc_state *pipe_config,
@@ -1189,7 +1205,8 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 	intel_tv_mode_to_mode(adjusted_mode, tv_mode);
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
 
-	if (IS_GEN(dev_priv, 3) && hdisplay > 1024) {
+	if (intel_tv_source_too_wide(dev_priv, hdisplay) ||
+	    !intel_tv_vert_scaling(adjusted_mode, conn_state, vdisplay)) {
 		int extra, top, bottom;
 
 		extra = adjusted_mode->crtc_vdisplay - vdisplay;

From 3df0bd19193c47c48b7904e5f41c3041a431da33 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 29 Jan 2019 20:52:28 +0000
Subject: [PATCH 58/89] drm/i915: Remove the intel_engine_notify tracepoint

The global seqno is defunct and so we have no meaningful indicator of
forward progress for an engine. You need to listen to the request
signaling tracepoints instead.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129205230.19056-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_irq.c   |  2 --
 drivers/gpu/drm/i915/i915_trace.h | 25 -------------------------
 2 files changed, 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 26ab00862b59..0fcdb14c50f4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1227,8 +1227,6 @@ static void notify_ring(struct intel_engine_cs *engine)
 		wake_up_process(tsk);
 
 	rcu_read_unlock();
-
-	trace_intel_engine_notify(engine, wait);
 }
 
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 33d90eca9cdd..cb5bc65d575d 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -750,31 +750,6 @@ trace_i915_request_out(struct i915_request *rq)
 #endif
 #endif
 
-TRACE_EVENT(intel_engine_notify,
-	    TP_PROTO(struct intel_engine_cs *engine, bool waiters),
-	    TP_ARGS(engine, waiters),
-
-	    TP_STRUCT__entry(
-			     __field(u32, dev)
-			     __field(u16, class)
-			     __field(u16, instance)
-			     __field(u32, seqno)
-			     __field(bool, waiters)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->dev = engine->i915->drm.primary->index;
-			   __entry->class = engine->uabi_class;
-			   __entry->instance = engine->instance;
-			   __entry->seqno = intel_engine_get_seqno(engine);
-			   __entry->waiters = waiters;
-			   ),
-
-	    TP_printk("dev=%u, engine=%u:%u, seqno=%u, waiters=%u",
-		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->seqno, __entry->waiters)
-);
-
 DEFINE_EVENT(i915_request, i915_request_retire,
 	    TP_PROTO(struct i915_request *rq),
 	    TP_ARGS(rq)

From 52c0fdb25c7c919334b97976d05096b441a3eada Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 29 Jan 2019 20:52:29 +0000
Subject: [PATCH 59/89] drm/i915: Replace global breadcrumbs with per-context
 interrupt tracking

A few years ago, see commit 688e6c725816 ("drm/i915: Slaughter the
thundering i915_wait_request herd"), the issue of handling multiple
clients waiting in parallel was brought to our attention. The
requirement was that every client should be woken immediately upon its
request being signaled, without incurring any cpu overhead.

To handle certain fragility of our hw meant that we could not do a
simple check inside the irq handler (some generations required almost
unbounded delays before we could be sure of seqno coherency) and so
request completion checking required delegation.

Before commit 688e6c725816, the solution was simple. Every client
waiting on a request would be woken on every interrupt and each would do
a heavyweight check to see if their request was complete. Commit
688e6c725816 introduced an rbtree so that only the earliest waiter on
the global timeline would woken, and would wake the next and so on.
(Along with various complications to handle requests being reordered
along the global timeline, and also a requirement for kthread to provide
a delegate for fence signaling that had no process context.)

The global rbtree depends on knowing the execution timeline (and global
seqno). Without knowing that order, we must instead check all contexts
queued to the HW to see which may have advanced. We trim that list by
only checking queued contexts that are being waited on, but still we
keep a list of all active contexts and their active signalers that we
inspect from inside the irq handler. By moving the waiters onto the fence
signal list, we can combine the client wakeup with the dma_fence
signaling (a dramatic reduction in complexity, but does require the HW
being coherent, the seqno must be visible from the cpu before the
interrupt is raised - we keep a timer backup just in case).

Having previously fixed all the issues with irq-seqno serialisation (by
inserting delays onto the GPU after each request instead of random delays
on the CPU after each interrupt), we can rely on the seqno state to
perfom direct wakeups from the interrupt handler. This allows us to
preserve our single context switch behaviour of the current routine,
with the only downside that we lose the RT priority sorting of wakeups.
In general, direct wakeup latency of multiple clients is about the same
(about 10% better in most cases) with a reduction in total CPU time spent
in the waiter (about 20-50% depending on gen). Average herd behaviour is
improved, but at the cost of not delegating wakeups on task_prio.

v2: Capture fence signaling state for error state and add comments to
warm even the most cold of hearts.
v3: Check if the request is still active before busywaiting
v4: Reduce the amount of pointer misdirection with list_for_each_safe
and using a local i915_request variable inside the loops
v5: Add a missing pluralisation to a purely informative selftest message.

References: 688e6c725816 ("drm/i915: Slaughter the thundering i915_wait_request herd")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129205230.19056-2-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_debugfs.c           |  28 +-
 drivers/gpu/drm/i915/i915_gem_context.c       |   3 +
 drivers/gpu/drm/i915/i915_gem_context.h       |   2 +
 drivers/gpu/drm/i915/i915_gpu_error.c         |  83 +-
 drivers/gpu/drm/i915/i915_gpu_error.h         |   9 +-
 drivers/gpu/drm/i915/i915_irq.c               |  82 +-
 drivers/gpu/drm/i915/i915_request.c           | 152 ++--
 drivers/gpu/drm/i915/i915_request.h           |  72 +-
 drivers/gpu/drm/i915/i915_reset.c             |  16 +-
 drivers/gpu/drm/i915/i915_scheduler.c         |   2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c      | 844 +++++-------------
 drivers/gpu/drm/i915/intel_engine_cs.c        |  35 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c       |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h       |  96 +-
 .../drm/i915/selftests/i915_mock_selftests.h  |   1 -
 drivers/gpu/drm/i915/selftests/i915_request.c | 425 +++++++++
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |   5 -
 .../drm/i915/selftests/intel_breadcrumbs.c    | 470 ----------
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |   2 +-
 drivers/gpu/drm/i915/selftests/lib_sw_fence.c |  54 ++
 drivers/gpu/drm/i915/selftests/lib_sw_fence.h |   3 +
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  17 +-
 drivers/gpu/drm/i915/selftests/mock_engine.h  |   6 -
 23 files changed, 909 insertions(+), 1500 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c9c230499420..29d52304c189 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1315,29 +1315,16 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake));
 
 	for_each_engine(engine, dev_priv, id) {
-		struct intel_breadcrumbs *b = &engine->breadcrumbs;
-		struct rb_node *rb;
-
 		seq_printf(m, "%s:\n", engine->name);
 		seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
 			   engine->hangcheck.seqno, seqno[id],
 			   intel_engine_last_submit(engine),
 			   jiffies_to_msecs(jiffies -
 					    engine->hangcheck.action_timestamp));
-		seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
-			   yesno(intel_engine_has_waiter(engine)),
+		seq_printf(m, "\tfake irq active? %s\n",
 			   yesno(test_bit(engine->id,
 					  &dev_priv->gpu_error.missed_irq_rings)));
 
-		spin_lock_irq(&b->rb_lock);
-		for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-			struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-			seq_printf(m, "\t%s [%d] waiting for %x\n",
-				   w->tsk->comm, w->tsk->pid, w->seqno);
-		}
-		spin_unlock_irq(&b->rb_lock);
-
 		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
 			   (long long)engine->hangcheck.acthd,
 			   (long long)acthd[id]);
@@ -2021,18 +2008,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static int count_irq_waiters(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int count = 0;
-
-	for_each_engine(engine, i915, id)
-		count += intel_engine_has_waiter(engine);
-
-	return count;
-}
-
 static const char *rps_power_to_str(unsigned int power)
 {
 	static const char * const strings[] = {
@@ -2072,7 +2047,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
 	seq_printf(m, "GPU busy? %s [%d requests]\n",
 		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
-	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
 	seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 93e84751370f..6faf1f6faab5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -327,6 +327,9 @@ intel_context_init(struct intel_context *ce,
 		   struct intel_engine_cs *engine)
 {
 	ce->gem_context = ctx;
+
+	INIT_LIST_HEAD(&ce->signal_link);
+	INIT_LIST_HEAD(&ce->signals);
 }
 
 static struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 3769438228f6..6ba40ff6b91f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -164,6 +164,8 @@ struct i915_gem_context {
 	struct intel_context {
 		struct i915_gem_context *gem_context;
 		struct intel_engine_cs *active;
+		struct list_head signal_link;
+		struct list_head signals;
 		struct i915_vma *state;
 		struct intel_ring *ring;
 		u32 *lrc_reg_state;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 898e06014295..304a7ef7f7fb 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -447,9 +447,14 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
 	if (!erq->seqno)
 		return;
 
-	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
+	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
 		   prefix, erq->pid, erq->ban_score,
-		   erq->context, erq->seqno, erq->sched_attr.priority,
+		   erq->context, erq->seqno,
+		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+			    &erq->flags) ? "!" : "",
+		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			    &erq->flags) ? "+" : "",
+		   erq->sched_attr.priority,
 		   jiffies_to_msecs(erq->jiffies - epoch),
 		   erq->start, erq->head, erq->tail);
 }
@@ -530,7 +535,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 	}
 	err_printf(m, "  seqno: 0x%08x\n", ee->seqno);
 	err_printf(m, "  last_seqno: 0x%08x\n", ee->last_seqno);
-	err_printf(m, "  waiting: %s\n", yesno(ee->waiting));
 	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
 	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
 	err_printf(m, "  hangcheck timestamp: %dms (%lu%s)\n",
@@ -804,21 +808,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 						    error->epoch);
 		}
 
-		if (IS_ERR(ee->waiters)) {
-			err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
-				   m->i915->engine[i]->name);
-		} else if (ee->num_waiters) {
-			err_printf(m, "%s --- %d waiters\n",
-				   m->i915->engine[i]->name,
-				   ee->num_waiters);
-			for (j = 0; j < ee->num_waiters; j++) {
-				err_printf(m, " seqno 0x%08x for %s [%d]\n",
-					   ee->waiters[j].seqno,
-					   ee->waiters[j].comm,
-					   ee->waiters[j].pid);
-			}
-		}
-
 		print_error_obj(m, m->i915->engine[i],
 				"ringbuffer", ee->ringbuffer);
 
@@ -1000,8 +989,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
 		i915_error_object_free(ee->wa_ctx);
 
 		kfree(ee->requests);
-		if (!IS_ERR_OR_NULL(ee->waiters))
-			kfree(ee->waiters);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
@@ -1205,59 +1192,6 @@ static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
 			I915_READ(RING_SYNC_2(engine->mmio_base));
 }
 
-static void error_record_engine_waiters(struct intel_engine_cs *engine,
-					struct drm_i915_error_engine *ee)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct drm_i915_error_waiter *waiter;
-	struct rb_node *rb;
-	int count;
-
-	ee->num_waiters = 0;
-	ee->waiters = NULL;
-
-	if (RB_EMPTY_ROOT(&b->waiters))
-		return;
-
-	if (!spin_trylock_irq(&b->rb_lock)) {
-		ee->waiters = ERR_PTR(-EDEADLK);
-		return;
-	}
-
-	count = 0;
-	for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
-		count++;
-	spin_unlock_irq(&b->rb_lock);
-
-	waiter = NULL;
-	if (count)
-		waiter = kmalloc_array(count,
-				       sizeof(struct drm_i915_error_waiter),
-				       GFP_ATOMIC);
-	if (!waiter)
-		return;
-
-	if (!spin_trylock_irq(&b->rb_lock)) {
-		kfree(waiter);
-		ee->waiters = ERR_PTR(-EDEADLK);
-		return;
-	}
-
-	ee->waiters = waiter;
-	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-		strcpy(waiter->comm, w->tsk->comm);
-		waiter->pid = w->tsk->pid;
-		waiter->seqno = w->seqno;
-		waiter++;
-
-		if (++ee->num_waiters == count)
-			break;
-	}
-	spin_unlock_irq(&b->rb_lock);
-}
-
 static void error_record_engine_registers(struct i915_gpu_state *error,
 					  struct intel_engine_cs *engine,
 					  struct drm_i915_error_engine *ee)
@@ -1293,7 +1227,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 
 	intel_engine_get_instdone(engine, &ee->instdone);
 
-	ee->waiting = intel_engine_has_waiter(engine);
 	ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
 	ee->acthd = intel_engine_get_active_head(engine);
 	ee->seqno = intel_engine_get_seqno(engine);
@@ -1367,6 +1300,7 @@ static void record_request(struct i915_request *request,
 {
 	struct i915_gem_context *ctx = request->gem_context;
 
+	erq->flags = request->fence.flags;
 	erq->context = ctx->hw_id;
 	erq->sched_attr = request->sched.attr;
 	erq->ban_score = atomic_read(&ctx->ban_score);
@@ -1542,7 +1476,6 @@ static void gem_record_rings(struct i915_gpu_state *error)
 		ee->engine_id = i;
 
 		error_record_engine_registers(error, engine, ee);
-		error_record_engine_waiters(engine, ee);
 		error_record_engine_execlists(engine, ee);
 
 		request = i915_gem_find_active_request(engine);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 231173786eae..74757c424aab 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -82,8 +82,6 @@ struct i915_gpu_state {
 		int engine_id;
 		/* Software tracked state */
 		bool idle;
-		bool waiting;
-		int num_waiters;
 		unsigned long hangcheck_timestamp;
 		struct i915_address_space *vm;
 		int num_requests;
@@ -147,6 +145,7 @@ struct i915_gpu_state {
 		struct drm_i915_error_object *default_state;
 
 		struct drm_i915_error_request {
+			unsigned long flags;
 			long jiffies;
 			pid_t pid;
 			u32 context;
@@ -159,12 +158,6 @@ struct i915_gpu_state {
 		} *requests, execlist[EXECLIST_MAX_PORTS];
 		unsigned int num_ports;
 
-		struct drm_i915_error_waiter {
-			char comm[TASK_COMM_LEN];
-			pid_t pid;
-			u32 seqno;
-		} *waiters;
-
 		struct {
 			u32 gfx_mode;
 			union {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0fcdb14c50f4..eab085686a2a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1169,66 +1169,6 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 	return;
 }
 
-static void notify_ring(struct intel_engine_cs *engine)
-{
-	const u32 seqno = intel_engine_get_seqno(engine);
-	struct i915_request *rq = NULL;
-	struct task_struct *tsk = NULL;
-	struct intel_wait *wait;
-
-	if (unlikely(!engine->breadcrumbs.irq_armed))
-		return;
-
-	rcu_read_lock();
-
-	spin_lock(&engine->breadcrumbs.irq_lock);
-	wait = engine->breadcrumbs.irq_wait;
-	if (wait) {
-		/*
-		 * We use a callback from the dma-fence to submit
-		 * requests after waiting on our own requests. To
-		 * ensure minimum delay in queuing the next request to
-		 * hardware, signal the fence now rather than wait for
-		 * the signaler to be woken up. We still wake up the
-		 * waiter in order to handle the irq-seqno coherency
-		 * issues (we may receive the interrupt before the
-		 * seqno is written, see __i915_request_irq_complete())
-		 * and to handle coalescing of multiple seqno updates
-		 * and many waiters.
-		 */
-		if (i915_seqno_passed(seqno, wait->seqno)) {
-			struct i915_request *waiter = wait->request;
-
-			if (waiter &&
-			    !i915_request_signaled(waiter) &&
-			    intel_wait_check_request(wait, waiter))
-				rq = i915_request_get(waiter);
-
-			tsk = wait->tsk;
-		}
-
-		engine->breadcrumbs.irq_count++;
-	} else {
-		if (engine->breadcrumbs.irq_armed)
-			__intel_engine_disarm_breadcrumbs(engine);
-	}
-	spin_unlock(&engine->breadcrumbs.irq_lock);
-
-	if (rq) {
-		spin_lock(&rq->lock);
-		dma_fence_signal_locked(&rq->fence);
-		GEM_BUG_ON(!i915_request_completed(rq));
-		spin_unlock(&rq->lock);
-
-		i915_request_put(rq);
-	}
-
-	if (tsk && tsk->state & TASK_NORMAL)
-		wake_up_process(tsk);
-
-	rcu_read_unlock();
-}
-
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
 			struct intel_rps_ei *ei)
 {
@@ -1473,20 +1413,20 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv,
 			       u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[RCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[VCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
 }
 
 static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 			       u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[RCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 	if (gt_iir & GT_BSD_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[VCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
 	if (gt_iir & GT_BLT_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[BCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]);
 
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
@@ -1506,7 +1446,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 		tasklet = true;
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
-		notify_ring(engine);
+		intel_engine_breadcrumbs_irq(engine);
 		tasklet |= USES_GUC_SUBMISSION(engine->i915);
 	}
 
@@ -1852,7 +1792,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 
 	if (HAS_VEBOX(dev_priv)) {
 		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[VECS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]);
 
 		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
 			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
@@ -4276,7 +4216,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		I915_WRITE16(IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[RCS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4384,7 +4324,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		I915_WRITE(IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[RCS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4529,10 +4469,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		I915_WRITE(IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[RCS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 
 		if (iir & I915_BSD_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[VCS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 7db15b7b3de8..9ed5baf157a3 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -60,7 +60,7 @@ static bool i915_fence_signaled(struct dma_fence *fence)
 
 static bool i915_fence_enable_signaling(struct dma_fence *fence)
 {
-	return intel_engine_enable_signaling(to_request(fence), true);
+	return i915_request_enable_breadcrumb(to_request(fence));
 }
 
 static signed long i915_fence_wait(struct dma_fence *fence,
@@ -203,7 +203,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
 	if (!i915_request_signaled(rq))
 		dma_fence_signal_locked(&rq->fence);
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
-		intel_engine_cancel_signaling(rq);
+		i915_request_cancel_breadcrumb(rq);
 	if (rq->waitboost) {
 		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
 		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
@@ -377,9 +377,12 @@ void __i915_request_submit(struct i915_request *request)
 
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
+	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 	request->global_seqno = seqno;
-	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-		intel_engine_enable_signaling(request, false);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+	    !i915_request_enable_breadcrumb(request))
+		intel_engine_queue_breadcrumbs(engine);
 	spin_unlock(&request->lock);
 
 	engine->emit_fini_breadcrumb(request,
@@ -389,8 +392,6 @@ void __i915_request_submit(struct i915_request *request)
 	move_to_timeline(request, &engine->timeline);
 
 	trace_i915_request_execute(request);
-
-	wake_up_all(&request->execute);
 }
 
 void i915_request_submit(struct i915_request *request)
@@ -433,7 +434,9 @@ void __i915_request_unsubmit(struct i915_request *request)
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 	request->global_seqno = 0;
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-		intel_engine_cancel_signaling(request);
+		i915_request_cancel_breadcrumb(request);
+	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
+	clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 	spin_unlock(&request->lock);
 
 	/* Transfer back from the global per-engine timeline to per-context */
@@ -633,13 +636,11 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
-	init_waitqueue_head(&rq->execute);
 
 	i915_sched_node_init(&rq->sched);
 
 	/* No zalloc, must clear what we need by hand */
 	rq->global_seqno = 0;
-	rq->signaling.wait.seqno = 0;
 	rq->file_priv = NULL;
 	rq->batch = NULL;
 	rq->capture_list = NULL;
@@ -1030,13 +1031,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
 	return this_cpu != cpu;
 }
 
-static bool __i915_spin_request(const struct i915_request *rq,
-				u32 seqno, int state, unsigned long timeout_us)
+static bool __i915_spin_request(const struct i915_request * const rq,
+				int state, unsigned long timeout_us)
 {
-	struct intel_engine_cs *engine = rq->engine;
-	unsigned int irq, cpu;
-
-	GEM_BUG_ON(!seqno);
+	unsigned int cpu;
 
 	/*
 	 * Only wait for the request if we know it is likely to complete.
@@ -1044,12 +1042,12 @@ static bool __i915_spin_request(const struct i915_request *rq,
 	 * We don't track the timestamps around requests, nor the average
 	 * request length, so we do not have a good indicator that this
 	 * request will complete within the timeout. What we do know is the
-	 * order in which requests are executed by the engine and so we can
-	 * tell if the request has started. If the request hasn't started yet,
-	 * it is a fair assumption that it will not complete within our
-	 * relatively short timeout.
+	 * order in which requests are executed by the context and so we can
+	 * tell if the request has been started. If the request is not even
+	 * running yet, it is a fair assumption that it will not complete
+	 * within our relatively short timeout.
 	 */
-	if (!intel_engine_has_started(engine, seqno))
+	if (!i915_request_is_running(rq))
 		return false;
 
 	/*
@@ -1063,20 +1061,10 @@ static bool __i915_spin_request(const struct i915_request *rq,
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
-	irq = READ_ONCE(engine->breadcrumbs.irq_count);
 	timeout_us += local_clock_us(&cpu);
 	do {
-		if (intel_engine_has_completed(engine, seqno))
-			return seqno == i915_request_global_seqno(rq);
-
-		/*
-		 * Seqno are meant to be ordered *before* the interrupt. If
-		 * we see an interrupt without a corresponding seqno advance,
-		 * assume we won't see one in the near future but require
-		 * the engine->seqno_barrier() to fixup coherency.
-		 */
-		if (READ_ONCE(engine->breadcrumbs.irq_count) != irq)
-			break;
+		if (i915_request_completed(rq))
+			return true;
 
 		if (signal_pending_state(state, current))
 			break;
@@ -1090,6 +1078,18 @@ static bool __i915_spin_request(const struct i915_request *rq,
 	return false;
 }
 
+struct request_wait {
+	struct dma_fence_cb cb;
+	struct task_struct *tsk;
+};
+
+static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct request_wait *wait = container_of(cb, typeof(*wait), cb);
+
+	wake_up_process(wait->tsk);
+}
+
 /**
  * i915_request_wait - wait until execution of request has finished
  * @rq: the request to wait upon
@@ -1115,8 +1115,7 @@ long i915_request_wait(struct i915_request *rq,
 {
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-	DEFINE_WAIT_FUNC(exec, default_wake_function);
-	struct intel_wait wait;
+	struct request_wait wait;
 
 	might_sleep();
 	GEM_BUG_ON(timeout < 0);
@@ -1128,85 +1127,42 @@ long i915_request_wait(struct i915_request *rq,
 		return -ETIME;
 
 	trace_i915_request_wait_begin(rq, flags);
-	add_wait_queue(&rq->execute, &exec);
-	intel_wait_init(&wait);
+
+	/* Optimistic short spin before touching IRQs */
+	if (__i915_spin_request(rq, state, 5))
+		goto out;
+
 	if (flags & I915_WAIT_PRIORITY)
 		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
 
-restart:
-	do {
-		set_current_state(state);
-		if (intel_wait_update_request(&wait, rq))
-			break;
-
-		if (signal_pending_state(state, current)) {
-			timeout = -ERESTARTSYS;
-			goto complete;
-		}
-
-		if (!timeout) {
-			timeout = -ETIME;
-			goto complete;
-		}
-
-		timeout = io_schedule_timeout(timeout);
-	} while (1);
-
-	GEM_BUG_ON(!intel_wait_has_seqno(&wait));
-	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
-
-	/* Optimistic short spin before touching IRQs */
-	if (__i915_spin_request(rq, wait.seqno, state, 5))
-		goto complete;
-
-	set_current_state(state);
-	if (intel_engine_add_wait(rq->engine, &wait))
-		/*
-		 * In order to check that we haven't missed the interrupt
-		 * as we enabled it, we need to kick ourselves to do a
-		 * coherent check on the seqno before we sleep.
-		 */
-		goto wakeup;
+	wait.tsk = current;
+	if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
+		goto out;
 
 	for (;;) {
-		if (signal_pending_state(state, current)) {
-			timeout = -ERESTARTSYS;
-			break;
-		}
-
-		if (!timeout) {
-			timeout = -ETIME;
-			break;
-		}
-
-		timeout = io_schedule_timeout(timeout);
-
-		if (intel_wait_complete(&wait) &&
-		    intel_wait_check_request(&wait, rq))
-			break;
-
 		set_current_state(state);
 
-wakeup:
 		if (i915_request_completed(rq))
 			break;
 
-		/* Only spin if we know the GPU is processing this request */
-		if (__i915_spin_request(rq, wait.seqno, state, 2))
+		if (signal_pending_state(state, current)) {
+			timeout = -ERESTARTSYS;
 			break;
-
-		if (!intel_wait_check_request(&wait, rq)) {
-			intel_engine_remove_wait(rq->engine, &wait);
-			goto restart;
 		}
+
+		if (!timeout) {
+			timeout = -ETIME;
+			break;
+		}
+
+		timeout = io_schedule_timeout(timeout);
 	}
-
-	intel_engine_remove_wait(rq->engine, &wait);
-complete:
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&rq->execute, &exec);
-	trace_i915_request_wait_end(rq);
 
+	dma_fence_remove_callback(&rq->fence, &wait.cb);
+
+out:
+	trace_i915_request_wait_end(rq);
 	return timeout;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 340d6216791c..3cffb96203b9 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -38,23 +38,34 @@ struct drm_i915_gem_object;
 struct i915_request;
 struct i915_timeline;
 
-struct intel_wait {
-	struct rb_node node;
-	struct task_struct *tsk;
-	struct i915_request *request;
-	u32 seqno;
-};
-
-struct intel_signal_node {
-	struct intel_wait wait;
-	struct list_head link;
-};
-
 struct i915_capture_list {
 	struct i915_capture_list *next;
 	struct i915_vma *vma;
 };
 
+enum {
+	/*
+	 * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW.
+	 *
+	 * Set by __i915_request_submit() on handing over to HW, and cleared
+	 * by __i915_request_unsubmit() if we preempt this request.
+	 *
+	 * Finally cleared for consistency on retiring the request, when
+	 * we know the HW is no longer running this request.
+	 *
+	 * See i915_request_is_active()
+	 */
+	I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
+
+	/*
+	 * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
+	 *
+	 * Internal bookkeeping used by the breadcrumb code to track when
+	 * a request is on the various signal_list.
+	 */
+	I915_FENCE_FLAG_SIGNAL,
+};
+
 /**
  * Request queue structure.
  *
@@ -97,7 +108,7 @@ struct i915_request {
 	struct intel_context *hw_context;
 	struct intel_ring *ring;
 	struct i915_timeline *timeline;
-	struct intel_signal_node signaling;
+	struct list_head signal_link;
 
 	/*
 	 * The rcu epoch of when this request was allocated. Used to judiciously
@@ -116,7 +127,6 @@ struct i915_request {
 	 */
 	struct i915_sw_fence submit;
 	wait_queue_entry_t submitq;
-	wait_queue_head_t execute;
 
 	/*
 	 * A list of everyone we wait upon, and everyone who waits upon us.
@@ -255,7 +265,7 @@ i915_request_put(struct i915_request *rq)
  * that it has passed the global seqno and the global seqno is unchanged
  * after the read, it is indeed complete).
  */
-static u32
+static inline u32
 i915_request_global_seqno(const struct i915_request *request)
 {
 	return READ_ONCE(request->global_seqno);
@@ -277,6 +287,10 @@ void i915_request_skip(struct i915_request *request, int error);
 void __i915_request_unsubmit(struct i915_request *request);
 void i915_request_unsubmit(struct i915_request *request);
 
+/* Note: part of the intel_breadcrumbs family */
+bool i915_request_enable_breadcrumb(struct i915_request *request);
+void i915_request_cancel_breadcrumb(struct i915_request *request);
+
 long i915_request_wait(struct i915_request *rq,
 		       unsigned int flags,
 		       long timeout)
@@ -293,6 +307,11 @@ static inline bool i915_request_signaled(const struct i915_request *rq)
 	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
 }
 
+static inline bool i915_request_is_active(const struct i915_request *rq)
+{
+	return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
+}
+
 /**
  * Returns true if seq1 is later than seq2.
  */
@@ -330,6 +349,11 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
 	return seqno;
 }
 
+static inline bool __i915_request_has_started(const struct i915_request *rq)
+{
+	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
+}
+
 /**
  * i915_request_started - check if the request has begun being executed
  * @rq: the request
@@ -345,7 +369,23 @@ static inline bool i915_request_started(const struct i915_request *rq)
 		return true;
 
 	/* Remember: started but may have since been preempted! */
-	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
+	return __i915_request_has_started(rq);
+}
+
+/**
+ * i915_request_is_running - check if the request may actually be executing
+ * @rq: the request
+ *
+ * Returns true if the request is currently submitted to hardware, has passed
+ * its start point (i.e. the context is setup and not busywaiting). Note that
+ * it may no longer be running by the time the function returns!
+ */
+static inline bool i915_request_is_running(const struct i915_request *rq)
+{
+	if (!i915_request_is_active(rq))
+		return false;
+
+	return __i915_request_has_started(rq);
 }
 
 static inline bool i915_request_completed(const struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index acf3c777e49d..4462007a681c 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -29,7 +29,7 @@ static void engine_skip_context(struct i915_request *rq)
 
 	spin_lock(&timeline->lock);
 
-	if (rq->global_seqno) {
+	if (i915_request_is_active(rq)) {
 		list_for_each_entry_continue(rq,
 					     &engine->timeline.requests, link)
 			if (rq->gem_context == hung_ctx)
@@ -751,18 +751,20 @@ static void reset_restart(struct drm_i915_private *i915)
 
 static void nop_submit_request(struct i915_request *request)
 {
+	struct intel_engine_cs *engine = request->engine;
 	unsigned long flags;
 
 	GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
-		  request->engine->name,
-		  request->fence.context, request->fence.seqno);
+		  engine->name, request->fence.context, request->fence.seqno);
 	dma_fence_set_error(&request->fence, -EIO);
 
-	spin_lock_irqsave(&request->engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 	__i915_request_submit(request);
 	i915_request_mark_complete(request);
-	intel_engine_write_global_seqno(request->engine, request->global_seqno);
-	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
+	intel_engine_write_global_seqno(engine, request->global_seqno);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+	intel_engine_queue_breadcrumbs(engine);
 }
 
 void i915_gem_set_wedged(struct drm_i915_private *i915)
@@ -817,7 +819,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 
 	for_each_engine(engine, i915, id) {
 		reset_finish_engine(engine);
-		intel_engine_wakeup(engine);
+		intel_engine_signal_breadcrumbs(engine);
 	}
 
 	smp_mb__before_atomic();
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 2d172991024f..d01683167c77 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -243,7 +243,7 @@ static bool inflight(const struct i915_request *rq,
 {
 	const struct i915_request *active;
 
-	if (!rq->global_seqno)
+	if (!i915_request_is_active(rq))
 		return false;
 
 	active = port_request(engine->execlists.port);
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index b58915b8708b..b0795b0ad227 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -29,46 +29,147 @@
 
 #define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_rq)
 
-static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b)
+static void irq_enable(struct intel_engine_cs *engine)
 {
-	struct intel_wait *wait;
-	unsigned int result = 0;
+	if (!engine->irq_enable)
+		return;
 
+	/* Caller disables interrupts */
+	spin_lock(&engine->i915->irq_lock);
+	engine->irq_enable(engine);
+	spin_unlock(&engine->i915->irq_lock);
+}
+
+static void irq_disable(struct intel_engine_cs *engine)
+{
+	if (!engine->irq_disable)
+		return;
+
+	/* Caller disables interrupts */
+	spin_lock(&engine->i915->irq_lock);
+	engine->irq_disable(engine);
+	spin_unlock(&engine->i915->irq_lock);
+}
+
+static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
 	lockdep_assert_held(&b->irq_lock);
 
-	wait = b->irq_wait;
-	if (wait) {
-		/*
-		 * N.B. Since task_asleep() and ttwu are not atomic, the
-		 * waiter may actually go to sleep after the check, causing
-		 * us to suppress a valid wakeup. We prefer to reduce the
-		 * number of false positive missed_breadcrumb() warnings
-		 * at the expense of a few false negatives, as it it easy
-		 * to trigger a false positive under heavy load. Enough
-		 * signal should remain from genuine missed_breadcrumb()
-		 * for us to detect in CI.
-		 */
-		bool was_asleep = task_asleep(wait->tsk);
+	GEM_BUG_ON(!b->irq_enabled);
+	if (!--b->irq_enabled)
+		irq_disable(container_of(b,
+					 struct intel_engine_cs,
+					 breadcrumbs));
 
-		result = ENGINE_WAKEUP_WAITER;
-		if (wake_up_process(wait->tsk) && was_asleep)
-			result |= ENGINE_WAKEUP_ASLEEP;
+	b->irq_armed = false;
+}
+
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	if (!b->irq_armed)
+		return;
+
+	spin_lock_irq(&b->irq_lock);
+	if (b->irq_armed)
+		__intel_breadcrumbs_disarm_irq(b);
+	spin_unlock_irq(&b->irq_lock);
+}
+
+static inline bool __request_completed(const struct i915_request *rq)
+{
+	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
+}
+
+bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct intel_context *ce, *cn;
+	struct list_head *pos, *next;
+	LIST_HEAD(signal);
+
+	spin_lock(&b->irq_lock);
+
+	b->irq_fired = true;
+	if (b->irq_armed && list_empty(&b->signalers))
+		__intel_breadcrumbs_disarm_irq(b);
+
+	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
+		GEM_BUG_ON(list_empty(&ce->signals));
+
+		list_for_each_safe(pos, next, &ce->signals) {
+			struct i915_request *rq =
+				list_entry(pos, typeof(*rq), signal_link);
+
+			if (!__request_completed(rq))
+				break;
+
+			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
+					     &rq->fence.flags));
+			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+			/*
+			 * We may race with direct invocation of
+			 * dma_fence_signal(), e.g. i915_request_retire(),
+			 * in which case we can skip processing it ourselves.
+			 */
+			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				     &rq->fence.flags))
+				continue;
+
+			/*
+			 * Queue for execution after dropping the signaling
+			 * spinlock as the callback chain may end up adding
+			 * more signalers to the same context or engine.
+			 */
+			i915_request_get(rq);
+			list_add_tail(&rq->signal_link, &signal);
+		}
+
+		/*
+		 * We process the list deletion in bulk, only using a list_add
+		 * (not list_move) above but keeping the status of
+		 * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit.
+		 */
+		if (!list_is_first(pos, &ce->signals)) {
+			/* Advance the list to the first incomplete request */
+			__list_del_many(&ce->signals, pos);
+			if (&ce->signals == pos) /* now empty */
+				list_del_init(&ce->signal_link);
+		}
 	}
 
+	spin_unlock(&b->irq_lock);
+
+	list_for_each_safe(pos, next, &signal) {
+		struct i915_request *rq =
+			list_entry(pos, typeof(*rq), signal_link);
+
+		dma_fence_signal(&rq->fence);
+		i915_request_put(rq);
+	}
+
+	return !list_empty(&signal);
+}
+
+bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
+{
+	bool result;
+
+	local_irq_disable();
+	result = intel_engine_breadcrumbs_irq(engine);
+	local_irq_enable();
+
 	return result;
 }
 
-unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
+static void signal_irq_work(struct irq_work *work)
 {
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	unsigned long flags;
-	unsigned int result;
+	struct intel_engine_cs *engine =
+		container_of(work, typeof(*engine), breadcrumbs.irq_work);
 
-	spin_lock_irqsave(&b->irq_lock, flags);
-	result = __intel_breadcrumbs_wakeup(b);
-	spin_unlock_irqrestore(&b->irq_lock, flags);
-
-	return result;
+	intel_engine_breadcrumbs_irq(engine);
 }
 
 static unsigned long wait_timeout(void)
@@ -94,19 +195,15 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t)
 	struct intel_engine_cs *engine =
 		from_timer(engine, t, breadcrumbs.hangcheck);
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	unsigned int irq_count;
 
 	if (!b->irq_armed)
 		return;
 
-	irq_count = READ_ONCE(b->irq_count);
-	if (b->hangcheck_interrupts != irq_count) {
-		b->hangcheck_interrupts = irq_count;
-		mod_timer(&b->hangcheck, wait_timeout());
-		return;
-	}
+	if (b->irq_fired)
+		goto rearm;
 
-	/* We keep the hangcheck timer alive until we disarm the irq, even
+	/*
+	 * We keep the hangcheck timer alive until we disarm the irq, even
 	 * if there are no waiters at present.
 	 *
 	 * If the waiter was currently running, assume it hasn't had a chance
@@ -118,10 +215,13 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t)
 	 * but we still have a waiter. Assuming all batches complete within
 	 * DRM_I915_HANGCHECK_JIFFIES [1.5s]!
 	 */
-	if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) {
+	synchronize_hardirq(engine->i915->drm.irq);
+	if (intel_engine_signal_breadcrumbs(engine)) {
 		missed_breadcrumb(engine);
 		mod_timer(&b->fake_irq, jiffies + 1);
 	} else {
+rearm:
+		b->irq_fired = false;
 		mod_timer(&b->hangcheck, wait_timeout());
 	}
 }
@@ -140,11 +240,7 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t)
 	 * oldest waiter to do the coherent seqno check.
 	 */
 
-	spin_lock_irq(&b->irq_lock);
-	if (b->irq_armed && !__intel_breadcrumbs_wakeup(b))
-		__intel_engine_disarm_breadcrumbs(engine);
-	spin_unlock_irq(&b->irq_lock);
-	if (!b->irq_armed)
+	if (!intel_engine_signal_breadcrumbs(engine) && !b->irq_armed)
 		return;
 
 	/* If the user has disabled the fake-irq, restore the hangchecking */
@@ -156,43 +252,6 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t)
 	mod_timer(&b->fake_irq, jiffies + 1);
 }
 
-static void irq_enable(struct intel_engine_cs *engine)
-{
-	if (!engine->irq_enable)
-		return;
-
-	/* Caller disables interrupts */
-	spin_lock(&engine->i915->irq_lock);
-	engine->irq_enable(engine);
-	spin_unlock(&engine->i915->irq_lock);
-}
-
-static void irq_disable(struct intel_engine_cs *engine)
-{
-	if (!engine->irq_disable)
-		return;
-
-	/* Caller disables interrupts */
-	spin_lock(&engine->i915->irq_lock);
-	engine->irq_disable(engine);
-	spin_unlock(&engine->i915->irq_lock);
-}
-
-void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	lockdep_assert_held(&b->irq_lock);
-	GEM_BUG_ON(b->irq_wait);
-	GEM_BUG_ON(!b->irq_armed);
-
-	GEM_BUG_ON(!b->irq_enabled);
-	if (!--b->irq_enabled)
-		irq_disable(engine);
-
-	b->irq_armed = false;
-}
-
 void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
@@ -215,40 +274,6 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine)
 	spin_unlock_irq(&b->irq_lock);
 }
 
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct intel_wait *wait, *n;
-
-	if (!b->irq_armed)
-		return;
-
-	/*
-	 * We only disarm the irq when we are idle (all requests completed),
-	 * so if the bottom-half remains asleep, it missed the request
-	 * completion.
-	 */
-	if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP)
-		missed_breadcrumb(engine);
-
-	spin_lock_irq(&b->rb_lock);
-
-	spin_lock(&b->irq_lock);
-	b->irq_wait = NULL;
-	if (b->irq_armed)
-		__intel_engine_disarm_breadcrumbs(engine);
-	spin_unlock(&b->irq_lock);
-
-	rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) {
-		GEM_BUG_ON(!intel_engine_signaled(engine, wait->seqno));
-		RB_CLEAR_NODE(&wait->node);
-		wake_up_process(wait->tsk);
-	}
-	b->waiters = RB_ROOT;
-
-	spin_unlock_irq(&b->rb_lock);
-}
-
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
 {
 	const struct intel_engine_cs *engine =
@@ -264,7 +289,7 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b)
 	 * engine->seqno_barrier(), a timing error that should be transient
 	 * and unlikely to reoccur.
 	 */
-	return READ_ONCE(b->irq_count) == b->hangcheck_interrupts;
+	return !b->irq_fired;
 }
 
 static void enable_fake_irq(struct intel_breadcrumbs *b)
@@ -276,7 +301,7 @@ static void enable_fake_irq(struct intel_breadcrumbs *b)
 		mod_timer(&b->hangcheck, wait_timeout());
 }
 
-static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
+static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
 	struct intel_engine_cs *engine =
 		container_of(b, struct intel_engine_cs, breadcrumbs);
@@ -315,488 +340,17 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 	return enabled;
 }
 
-static inline struct intel_wait *to_wait(struct rb_node *node)
-{
-	return rb_entry(node, struct intel_wait, node);
-}
-
-static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
-					      struct intel_wait *wait)
-{
-	lockdep_assert_held(&b->rb_lock);
-	GEM_BUG_ON(b->irq_wait == wait);
-
-	/*
-	 * This request is completed, so remove it from the tree, mark it as
-	 * complete, and *then* wake up the associated task. N.B. when the
-	 * task wakes up, it will find the empty rb_node, discern that it
-	 * has already been removed from the tree and skip the serialisation
-	 * of the b->rb_lock and b->irq_lock. This means that the destruction
-	 * of the intel_wait is not serialised with the interrupt handler
-	 * by the waiter - it must instead be serialised by the caller.
-	 */
-	rb_erase(&wait->node, &b->waiters);
-	RB_CLEAR_NODE(&wait->node);
-
-	if (wait->tsk->state != TASK_RUNNING)
-		wake_up_process(wait->tsk); /* implicit smp_wmb() */
-}
-
-static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
-					    struct rb_node *next)
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-	spin_lock(&b->irq_lock);
-	GEM_BUG_ON(!b->irq_armed);
-	GEM_BUG_ON(!b->irq_wait);
-	b->irq_wait = to_wait(next);
-	spin_unlock(&b->irq_lock);
-
-	/* We always wake up the next waiter that takes over as the bottom-half
-	 * as we may delegate not only the irq-seqno barrier to the next waiter
-	 * but also the task of waking up concurrent waiters.
-	 */
-	if (next)
-		wake_up_process(to_wait(next)->tsk);
-}
-
-static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
-				    struct intel_wait *wait)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct rb_node **p, *parent, *completed;
-	bool first, armed;
-	u32 seqno;
-
-	GEM_BUG_ON(!wait->seqno);
-
-	/* Insert the request into the retirement ordered list
-	 * of waiters by walking the rbtree. If we are the oldest
-	 * seqno in the tree (the first to be retired), then
-	 * set ourselves as the bottom-half.
-	 *
-	 * As we descend the tree, prune completed branches since we hold the
-	 * spinlock we know that the first_waiter must be delayed and can
-	 * reduce some of the sequential wake up latency if we take action
-	 * ourselves and wake up the completed tasks in parallel. Also, by
-	 * removing stale elements in the tree, we may be able to reduce the
-	 * ping-pong between the old bottom-half and ourselves as first-waiter.
-	 */
-	armed = false;
-	first = true;
-	parent = NULL;
-	completed = NULL;
-	seqno = intel_engine_get_seqno(engine);
-
-	 /* If the request completed before we managed to grab the spinlock,
-	  * return now before adding ourselves to the rbtree. We let the
-	  * current bottom-half handle any pending wakeups and instead
-	  * try and get out of the way quickly.
-	  */
-	if (i915_seqno_passed(seqno, wait->seqno)) {
-		RB_CLEAR_NODE(&wait->node);
-		return first;
-	}
-
-	p = &b->waiters.rb_node;
-	while (*p) {
-		parent = *p;
-		if (wait->seqno == to_wait(parent)->seqno) {
-			/* We have multiple waiters on the same seqno, select
-			 * the highest priority task (that with the smallest
-			 * task->prio) to serve as the bottom-half for this
-			 * group.
-			 */
-			if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
-				p = &parent->rb_right;
-				first = false;
-			} else {
-				p = &parent->rb_left;
-			}
-		} else if (i915_seqno_passed(wait->seqno,
-					     to_wait(parent)->seqno)) {
-			p = &parent->rb_right;
-			if (i915_seqno_passed(seqno, to_wait(parent)->seqno))
-				completed = parent;
-			else
-				first = false;
-		} else {
-			p = &parent->rb_left;
-		}
-	}
-	rb_link_node(&wait->node, parent, p);
-	rb_insert_color(&wait->node, &b->waiters);
-
-	if (first) {
-		spin_lock(&b->irq_lock);
-		b->irq_wait = wait;
-		/* After assigning ourselves as the new bottom-half, we must
-		 * perform a cursory check to prevent a missed interrupt.
-		 * Either we miss the interrupt whilst programming the hardware,
-		 * or if there was a previous waiter (for a later seqno) they
-		 * may be woken instead of us (due to the inherent race
-		 * in the unlocked read of b->irq_seqno_bh in the irq handler)
-		 * and so we miss the wake up.
-		 */
-		armed = __intel_breadcrumbs_enable_irq(b);
-		spin_unlock(&b->irq_lock);
-	}
-
-	if (completed) {
-		/* Advance the bottom-half (b->irq_wait) before we wake up
-		 * the waiters who may scribble over their intel_wait
-		 * just as the interrupt handler is dereferencing it via
-		 * b->irq_wait.
-		 */
-		if (!first) {
-			struct rb_node *next = rb_next(completed);
-			GEM_BUG_ON(next == &wait->node);
-			__intel_breadcrumbs_next(engine, next);
-		}
-
-		do {
-			struct intel_wait *crumb = to_wait(completed);
-			completed = rb_prev(completed);
-			__intel_breadcrumbs_finish(b, crumb);
-		} while (completed);
-	}
-
-	GEM_BUG_ON(!b->irq_wait);
-	GEM_BUG_ON(!b->irq_armed);
-	GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node);
-
-	return armed;
-}
-
-bool intel_engine_add_wait(struct intel_engine_cs *engine,
-			   struct intel_wait *wait)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	bool armed;
-
-	spin_lock_irq(&b->rb_lock);
-	armed = __intel_engine_add_wait(engine, wait);
-	spin_unlock_irq(&b->rb_lock);
-	if (armed)
-		return armed;
-
-	/* Make the caller recheck if its request has already started. */
-	return intel_engine_has_started(engine, wait->seqno);
-}
-
-static inline bool chain_wakeup(struct rb_node *rb, int priority)
-{
-	return rb && to_wait(rb)->tsk->prio <= priority;
-}
-
-static inline int wakeup_priority(struct intel_breadcrumbs *b,
-				  struct task_struct *tsk)
-{
-	if (tsk == b->signaler)
-		return INT_MIN;
-	else
-		return tsk->prio;
-}
-
-static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
-				       struct intel_wait *wait)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	lockdep_assert_held(&b->rb_lock);
-
-	if (RB_EMPTY_NODE(&wait->node))
-		goto out;
-
-	if (b->irq_wait == wait) {
-		const int priority = wakeup_priority(b, wait->tsk);
-		struct rb_node *next;
-
-		/* We are the current bottom-half. Find the next candidate,
-		 * the first waiter in the queue on the remaining oldest
-		 * request. As multiple seqnos may complete in the time it
-		 * takes us to wake up and find the next waiter, we have to
-		 * wake up that waiter for it to perform its own coherent
-		 * completion check.
-		 */
-		next = rb_next(&wait->node);
-		if (chain_wakeup(next, priority)) {
-			/* If the next waiter is already complete,
-			 * wake it up and continue onto the next waiter. So
-			 * if have a small herd, they will wake up in parallel
-			 * rather than sequentially, which should reduce
-			 * the overall latency in waking all the completed
-			 * clients.
-			 *
-			 * However, waking up a chain adds extra latency to
-			 * the first_waiter. This is undesirable if that
-			 * waiter is a high priority task.
-			 */
-			u32 seqno = intel_engine_get_seqno(engine);
-
-			while (i915_seqno_passed(seqno, to_wait(next)->seqno)) {
-				struct rb_node *n = rb_next(next);
-
-				__intel_breadcrumbs_finish(b, to_wait(next));
-				next = n;
-				if (!chain_wakeup(next, priority))
-					break;
-			}
-		}
-
-		__intel_breadcrumbs_next(engine, next);
-	} else {
-		GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
-	}
-
-	GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
-	rb_erase(&wait->node, &b->waiters);
-	RB_CLEAR_NODE(&wait->node);
-
-out:
-	GEM_BUG_ON(b->irq_wait == wait);
-	GEM_BUG_ON(rb_first(&b->waiters) !=
-		   (b->irq_wait ? &b->irq_wait->node : NULL));
-}
-
-void intel_engine_remove_wait(struct intel_engine_cs *engine,
-			      struct intel_wait *wait)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	/* Quick check to see if this waiter was already decoupled from
-	 * the tree by the bottom-half to avoid contention on the spinlock
-	 * by the herd.
-	 */
-	if (RB_EMPTY_NODE(&wait->node)) {
-		GEM_BUG_ON(READ_ONCE(b->irq_wait) == wait);
-		return;
-	}
-
-	spin_lock_irq(&b->rb_lock);
-	__intel_engine_remove_wait(engine, wait);
-	spin_unlock_irq(&b->rb_lock);
-}
-
-static void signaler_set_rtpriority(void)
-{
-	 struct sched_param param = { .sched_priority = 1 };
-
-	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
-}
-
-static int intel_breadcrumbs_signaler(void *arg)
-{
-	struct intel_engine_cs *engine = arg;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct i915_request *rq, *n;
-
-	/* Install ourselves with high priority to reduce signalling latency */
-	signaler_set_rtpriority();
-
-	do {
-		bool do_schedule = true;
-		LIST_HEAD(list);
-		u32 seqno;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (list_empty(&b->signals))
-			goto sleep;
-
-		/*
-		 * We are either woken up by the interrupt bottom-half,
-		 * or by a client adding a new signaller. In both cases,
-		 * the GPU seqno may have advanced beyond our oldest signal.
-		 * If it has, propagate the signal, remove the waiter and
-		 * check again with the next oldest signal. Otherwise we
-		 * need to wait for a new interrupt from the GPU or for
-		 * a new client.
-		 */
-		seqno = intel_engine_get_seqno(engine);
-
-		spin_lock_irq(&b->rb_lock);
-		list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
-			u32 this = rq->signaling.wait.seqno;
-
-			GEM_BUG_ON(!rq->signaling.wait.seqno);
-
-			if (!i915_seqno_passed(seqno, this))
-				break;
-
-			if (likely(this == i915_request_global_seqno(rq))) {
-				__intel_engine_remove_wait(engine,
-							   &rq->signaling.wait);
-
-				rq->signaling.wait.seqno = 0;
-				__list_del_entry(&rq->signaling.link);
-
-				if (!i915_request_signaled(rq)) {
-					list_add_tail(&rq->signaling.link,
-						      &list);
-					i915_request_get(rq);
-				}
-			}
-		}
-		spin_unlock_irq(&b->rb_lock);
-
-		if (!list_empty(&list)) {
-			local_bh_disable();
-			list_for_each_entry_safe(rq, n, &list, signaling.link) {
-				dma_fence_signal(&rq->fence);
-				GEM_BUG_ON(!i915_request_completed(rq));
-				i915_request_put(rq);
-			}
-			local_bh_enable(); /* kick start the tasklets */
-
-			/*
-			 * If the engine is saturated we may be continually
-			 * processing completed requests. This angers the
-			 * NMI watchdog if we never let anything else
-			 * have access to the CPU. Let's pretend to be nice
-			 * and relinquish the CPU if we burn through the
-			 * entire RT timeslice!
-			 */
-			do_schedule = need_resched();
-		}
-
-		if (unlikely(do_schedule)) {
-sleep:
-			if (kthread_should_park())
-				kthread_parkme();
-
-			if (unlikely(kthread_should_stop()))
-				break;
-
-			schedule();
-		}
-	} while (1);
-	__set_current_state(TASK_RUNNING);
-
-	return 0;
-}
-
-static void insert_signal(struct intel_breadcrumbs *b,
-			  struct i915_request *request,
-			  const u32 seqno)
-{
-	struct i915_request *iter;
-
-	lockdep_assert_held(&b->rb_lock);
-
-	/*
-	 * A reasonable assumption is that we are called to add signals
-	 * in sequence, as the requests are submitted for execution and
-	 * assigned a global_seqno. This will be the case for the majority
-	 * of internally generated signals (inter-engine signaling).
-	 *
-	 * Out of order waiters triggering random signaling enabling will
-	 * be more problematic, but hopefully rare enough and the list
-	 * small enough that the O(N) insertion sort is not an issue.
-	 */
-
-	list_for_each_entry_reverse(iter, &b->signals, signaling.link)
-		if (i915_seqno_passed(seqno, iter->signaling.wait.seqno))
-			break;
-
-	list_add(&request->signaling.link, &iter->signaling.link);
-}
-
-bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
-{
-	struct intel_engine_cs *engine = request->engine;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct intel_wait *wait = &request->signaling.wait;
-	u32 seqno;
-
-	/*
-	 * Note that we may be called from an interrupt handler on another
-	 * device (e.g. nouveau signaling a fence completion causing us
-	 * to submit a request, and so enable signaling). As such,
-	 * we need to make sure that all other users of b->rb_lock protect
-	 * against interrupts, i.e. use spin_lock_irqsave.
-	 */
-
-	/* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
-	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&request->lock);
-
-	seqno = i915_request_global_seqno(request);
-	if (!seqno) /* will be enabled later upon execution */
-		return true;
-
-	GEM_BUG_ON(wait->seqno);
-	wait->tsk = b->signaler;
-	wait->request = request;
-	wait->seqno = seqno;
-
-	/*
-	 * Add ourselves into the list of waiters, but registering our
-	 * bottom-half as the signaller thread. As per usual, only the oldest
-	 * waiter (not just signaller) is tasked as the bottom-half waking
-	 * up all completed waiters after the user interrupt.
-	 *
-	 * If we are the oldest waiter, enable the irq (after which we
-	 * must double check that the seqno did not complete).
-	 */
-	spin_lock(&b->rb_lock);
-	insert_signal(b, request, seqno);
-	wakeup &= __intel_engine_add_wait(engine, wait);
-	spin_unlock(&b->rb_lock);
-
-	if (wakeup) {
-		wake_up_process(b->signaler);
-		return !intel_wait_complete(wait);
-	}
-
-	return true;
-}
-
-void intel_engine_cancel_signaling(struct i915_request *request)
-{
-	struct intel_engine_cs *engine = request->engine;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&request->lock);
-
-	if (!READ_ONCE(request->signaling.wait.seqno))
-		return;
-
-	spin_lock(&b->rb_lock);
-	__intel_engine_remove_wait(engine, &request->signaling.wait);
-	if (fetch_and_zero(&request->signaling.wait.seqno))
-		__list_del_entry(&request->signaling.link);
-	spin_unlock(&b->rb_lock);
-}
-
-int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct task_struct *tsk;
-
-	spin_lock_init(&b->rb_lock);
 	spin_lock_init(&b->irq_lock);
+	INIT_LIST_HEAD(&b->signalers);
+
+	init_irq_work(&b->irq_work, signal_irq_work);
 
 	timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
 	timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
-
-	INIT_LIST_HEAD(&b->signals);
-
-	/* Spawn a thread to provide a common bottom-half for all signals.
-	 * As this is an asynchronous interface we cannot steal the current
-	 * task for handling the bottom-half to the user interrupt, therefore
-	 * we create a thread to do the coherent seqno dance after the
-	 * interrupt and then signal the waitqueue (via the dma-buf/fence).
-	 */
-	tsk = kthread_run(intel_breadcrumbs_signaler, engine,
-			  "i915/signal:%d", engine->id);
-	if (IS_ERR(tsk))
-		return PTR_ERR(tsk);
-
-	b->signaler = tsk;
-
-	return 0;
 }
 
 static void cancel_fake_irq(struct intel_engine_cs *engine)
@@ -832,19 +386,103 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 {
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	/* The engines should be idle and all requests accounted for! */
-	WARN_ON(READ_ONCE(b->irq_wait));
-	WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
-	WARN_ON(!list_empty(&b->signals));
-
-	if (!IS_ERR_OR_NULL(b->signaler))
-		kthread_stop(b->signaler);
-
 	cancel_fake_irq(engine);
 }
 
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_breadcrumbs.c"
-#endif
+bool i915_request_enable_breadcrumb(struct i915_request *rq)
+{
+	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
+
+	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+
+	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
+		return true;
+
+	spin_lock(&b->irq_lock);
+	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
+	    !__request_completed(rq)) {
+		struct intel_context *ce = rq->hw_context;
+		struct list_head *pos;
+
+		__intel_breadcrumbs_arm_irq(b);
+
+		/*
+		 * We keep the seqno in retirement order, so we can break
+		 * inside intel_engine_breadcrumbs_irq as soon as we've passed
+		 * the last completed request (or seen a request that hasn't
+		 * event started). We could iterate the timeline->requests list,
+		 * but keeping a separate signalers_list has the advantage of
+		 * hopefully being much smaller than the full list and so
+		 * provides faster iteration and detection when there are no
+		 * more interrupts required for this context.
+		 *
+		 * We typically expect to add new signalers in order, so we
+		 * start looking for our insertion point from the tail of
+		 * the list.
+		 */
+		list_for_each_prev(pos, &ce->signals) {
+			struct i915_request *it =
+				list_entry(pos, typeof(*it), signal_link);
+
+			if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
+				break;
+		}
+		list_add(&rq->signal_link, pos);
+		if (pos == &ce->signals) /* catch transitions from empty list */
+			list_move_tail(&ce->signal_link, &b->signalers);
+
+		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+	}
+	spin_unlock(&b->irq_lock);
+
+	return !__request_completed(rq);
+}
+
+void i915_request_cancel_breadcrumb(struct i915_request *rq)
+{
+	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
+
+	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+		return;
+
+	spin_lock(&b->irq_lock);
+	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
+		struct intel_context *ce = rq->hw_context;
+
+		list_del(&rq->signal_link);
+		if (list_empty(&ce->signals))
+			list_del_init(&ce->signal_link);
+
+		clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+	}
+	spin_unlock(&b->irq_lock);
+}
+
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+				    struct drm_printer *p)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct intel_context *ce;
+	struct i915_request *rq;
+
+	if (list_empty(&b->signalers))
+		return;
+
+	drm_printf(p, "Signals:\n");
+
+	spin_lock_irq(&b->irq_lock);
+	list_for_each_entry(ce, &b->signalers, signal_link) {
+		list_for_each_entry(rq, &ce->signals, signal_link) {
+			drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
+				   rq->fence.context, rq->fence.seqno,
+				   i915_request_completed(rq) ? "!" :
+				   i915_request_started(rq) ? "*" :
+				   "",
+				   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
+		}
+	}
+	spin_unlock_irq(&b->irq_lock);
+
+	if (test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
+		drm_printf(p, "Fake irq active\n");
+}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 0a610c9691fd..71c01eb13af1 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -458,12 +458,6 @@ cleanup:
 void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-
-	/* After manually advancing the seqno, fake the interrupt in case
-	 * there are any waiters for that seqno.
-	 */
-	intel_engine_wakeup(engine);
-
 	GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
 }
 
@@ -607,6 +601,7 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
 
 	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
 
+	intel_engine_init_breadcrumbs(engine);
 	intel_engine_init_execlist(engine);
 	intel_engine_init_hangcheck(engine);
 	intel_engine_init_batch_pool(engine);
@@ -717,20 +712,14 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 		}
 	}
 
-	ret = intel_engine_init_breadcrumbs(engine);
-	if (ret)
-		goto err_unpin_preempt;
-
 	ret = measure_breadcrumb_dw(engine);
 	if (ret < 0)
-		goto err_breadcrumbs;
+		goto err_unpin_preempt;
 
 	engine->emit_fini_breadcrumb_dw = ret;
 
 	return 0;
 
-err_breadcrumbs:
-	intel_engine_fini_breadcrumbs(engine);
 err_unpin_preempt:
 	if (i915->preempt_context)
 		__intel_context_unpin(i915->preempt_context, engine);
@@ -1294,12 +1283,14 @@ static void print_request(struct drm_printer *m,
 
 	x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
 
-	drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n",
+	drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n",
 		   prefix,
 		   rq->global_seqno,
 		   i915_request_completed(rq) ? "!" :
 		   i915_request_started(rq) ? "*" :
 		   "",
+		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			    &rq->fence.flags) ?  "+" : "",
 		   rq->fence.context, rq->fence.seqno,
 		   buf,
 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
@@ -1492,12 +1483,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		       struct drm_printer *m,
 		       const char *header, ...)
 {
-	struct intel_breadcrumbs * const b = &engine->breadcrumbs;
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_request *rq;
 	intel_wakeref_t wakeref;
-	unsigned long flags;
-	struct rb_node *rb;
 
 	if (header) {
 		va_list ap;
@@ -1565,21 +1553,12 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	intel_execlists_show_requests(engine, m, print_request, 8);
 
-	spin_lock_irqsave(&b->rb_lock, flags);
-	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-		drm_printf(m, "\t%s [%d:%c] waiting for %x\n",
-			   w->tsk->comm, w->tsk->pid,
-			   task_state_to_char(w->tsk),
-			   w->seqno);
-	}
-	spin_unlock_irqrestore(&b->rb_lock, flags);
-
 	drm_printf(m, "HWSP:\n");
 	hexdump(m, engine->status_page.addr, PAGE_SIZE);
 
 	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
+
+	intel_engine_print_breadcrumbs(engine, m);
 }
 
 static u8 user_class_map[] = {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 668ed67336a2..b889b27f8aeb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -743,7 +743,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	}
 
 	/* Papering over lost _interrupts_ immediately following the restart */
-	intel_engine_wakeup(engine);
+	intel_engine_queue_breadcrumbs(engine);
 out:
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 953ccc2617ff..71f8ceb937ff 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -5,6 +5,7 @@
 #include <drm/drm_util.h>
 
 #include <linux/hashtable.h>
+#include <linux/irq_work.h>
 #include <linux/seqlock.h>
 
 #include "i915_gem_batch_pool.h"
@@ -381,22 +382,19 @@ struct intel_engine_cs {
 	 * the overhead of waking that client is much preferred.
 	 */
 	struct intel_breadcrumbs {
-		spinlock_t irq_lock; /* protects irq_*; irqsafe */
-		struct intel_wait *irq_wait; /* oldest waiter by retirement */
+		spinlock_t irq_lock;
+		struct list_head signalers;
 
-		spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
-		struct rb_root waiters; /* sorted by retirement, priority */
-		struct list_head signals; /* sorted by retirement */
-		struct task_struct *signaler; /* used for fence signalling */
+		struct irq_work irq_work; /* for use from inside irq_lock */
 
 		struct timer_list fake_irq; /* used after a missed interrupt */
 		struct timer_list hangcheck; /* detect missed interrupts */
 
 		unsigned int hangcheck_interrupts;
 		unsigned int irq_enabled;
-		unsigned int irq_count;
 
-		bool irq_armed : 1;
+		bool irq_armed;
+		bool irq_fired;
 	} breadcrumbs;
 
 	struct {
@@ -885,83 +883,29 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
-/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
-int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
-
-static inline void intel_wait_init(struct intel_wait *wait)
-{
-	wait->tsk = current;
-	wait->request = NULL;
-}
-
-static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
-{
-	wait->tsk = current;
-	wait->seqno = seqno;
-}
-
-static inline bool intel_wait_has_seqno(const struct intel_wait *wait)
-{
-	return wait->seqno;
-}
-
-static inline bool
-intel_wait_update_seqno(struct intel_wait *wait, u32 seqno)
-{
-	wait->seqno = seqno;
-	return intel_wait_has_seqno(wait);
-}
-
-static inline bool
-intel_wait_update_request(struct intel_wait *wait,
-			  const struct i915_request *rq)
-{
-	return intel_wait_update_seqno(wait, i915_request_global_seqno(rq));
-}
-
-static inline bool
-intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno)
-{
-	return wait->seqno == seqno;
-}
-
-static inline bool
-intel_wait_check_request(const struct intel_wait *wait,
-			 const struct i915_request *rq)
-{
-	return intel_wait_check_seqno(wait, i915_request_global_seqno(rq));
-}
-
-static inline bool intel_wait_complete(const struct intel_wait *wait)
-{
-	return RB_EMPTY_NODE(&wait->node);
-}
-
-bool intel_engine_add_wait(struct intel_engine_cs *engine,
-			   struct intel_wait *wait);
-void intel_engine_remove_wait(struct intel_engine_cs *engine,
-			      struct intel_wait *wait);
-bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
-void intel_engine_cancel_signaling(struct i915_request *request);
-
-static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
-{
-	return READ_ONCE(engine->breadcrumbs.irq_wait);
-}
-
-unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
-#define ENGINE_WAKEUP_WAITER BIT(0)
-#define ENGINE_WAKEUP_ASLEEP BIT(1)
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
 void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
 void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
 
-void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
+bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 
+static inline void
+intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
+{
+	irq_work_queue(&engine->breadcrumbs.irq_work);
+}
+
+bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
+
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+				    struct drm_printer *p);
+
 static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 {
 	memset(batch, 0, 6 * sizeof(u32));
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 4a83a1c6c406..88e5ab586337 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -15,7 +15,6 @@ selftest(scatterlist, scatterlist_mock_selftests)
 selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
 selftest(engine, intel_engine_cs_mock_selftests)
-selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
 selftest(timelines, i915_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 4d4b86b5fa11..6733dc5b6b4c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -25,9 +25,12 @@
 #include <linux/prime_numbers.h>
 
 #include "../i915_selftest.h"
+#include "i915_random.h"
 #include "igt_live_test.h"
+#include "lib_sw_fence.h"
 
 #include "mock_context.h"
+#include "mock_drm.h"
 #include "mock_gem_device.h"
 
 static int igt_add_request(void *arg)
@@ -247,6 +250,254 @@ err_context_0:
 	return err;
 }
 
+struct smoketest {
+	struct intel_engine_cs *engine;
+	struct i915_gem_context **contexts;
+	atomic_long_t num_waits, num_fences;
+	int ncontexts, max_batch;
+	struct i915_request *(*request_alloc)(struct i915_gem_context *,
+					      struct intel_engine_cs *);
+};
+
+static struct i915_request *
+__mock_request_alloc(struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine)
+{
+	return mock_request(engine, ctx, 0);
+}
+
+static struct i915_request *
+__live_request_alloc(struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine)
+{
+	return i915_request_alloc(engine, ctx);
+}
+
+static int __igt_breadcrumbs_smoketest(void *arg)
+{
+	struct smoketest *t = arg;
+	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
+	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
+	const unsigned int total = 4 * t->ncontexts + 1;
+	unsigned int num_waits = 0, num_fences = 0;
+	struct i915_request **requests;
+	I915_RND_STATE(prng);
+	unsigned int *order;
+	int err = 0;
+
+	/*
+	 * A very simple test to catch the most egregious of list handling bugs.
+	 *
+	 * At its heart, we simply create oodles of requests running across
+	 * multiple kthreads and enable signaling on them, for the sole purpose
+	 * of stressing our breadcrumb handling. The only inspection we do is
+	 * that the fences were marked as signaled.
+	 */
+
+	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
+	if (!requests)
+		return -ENOMEM;
+
+	order = i915_random_order(total, &prng);
+	if (!order) {
+		err = -ENOMEM;
+		goto out_requests;
+	}
+
+	while (!kthread_should_stop()) {
+		struct i915_sw_fence *submit, *wait;
+		unsigned int n, count;
+
+		submit = heap_fence_create(GFP_KERNEL);
+		if (!submit) {
+			err = -ENOMEM;
+			break;
+		}
+
+		wait = heap_fence_create(GFP_KERNEL);
+		if (!wait) {
+			i915_sw_fence_commit(submit);
+			heap_fence_put(submit);
+			err = ENOMEM;
+			break;
+		}
+
+		i915_random_reorder(order, total, &prng);
+		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
+
+		for (n = 0; n < count; n++) {
+			struct i915_gem_context *ctx =
+				t->contexts[order[n] % t->ncontexts];
+			struct i915_request *rq;
+
+			mutex_lock(BKL);
+
+			rq = t->request_alloc(ctx, t->engine);
+			if (IS_ERR(rq)) {
+				mutex_unlock(BKL);
+				err = PTR_ERR(rq);
+				count = n;
+				break;
+			}
+
+			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+							       submit,
+							       GFP_KERNEL);
+
+			requests[n] = i915_request_get(rq);
+			i915_request_add(rq);
+
+			mutex_unlock(BKL);
+
+			if (err >= 0)
+				err = i915_sw_fence_await_dma_fence(wait,
+								    &rq->fence,
+								    0,
+								    GFP_KERNEL);
+
+			if (err < 0) {
+				i915_request_put(rq);
+				count = n;
+				break;
+			}
+		}
+
+		i915_sw_fence_commit(submit);
+		i915_sw_fence_commit(wait);
+
+		if (!wait_event_timeout(wait->wait,
+					i915_sw_fence_done(wait),
+					HZ / 2)) {
+			struct i915_request *rq = requests[count - 1];
+
+			pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
+			       count,
+			       rq->fence.context, rq->fence.seqno,
+			       t->engine->name);
+			i915_gem_set_wedged(t->engine->i915);
+			GEM_BUG_ON(!i915_request_completed(rq));
+			i915_sw_fence_wait(wait);
+			err = -EIO;
+		}
+
+		for (n = 0; n < count; n++) {
+			struct i915_request *rq = requests[n];
+
+			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				      &rq->fence.flags)) {
+				pr_err("%llu:%llu was not signaled!\n",
+				       rq->fence.context, rq->fence.seqno);
+				err = -EINVAL;
+			}
+
+			i915_request_put(rq);
+		}
+
+		heap_fence_put(wait);
+		heap_fence_put(submit);
+
+		if (err < 0)
+			break;
+
+		num_fences += count;
+		num_waits++;
+
+		cond_resched();
+	}
+
+	atomic_long_add(num_fences, &t->num_fences);
+	atomic_long_add(num_waits, &t->num_waits);
+
+	kfree(order);
+out_requests:
+	kfree(requests);
+	return err;
+}
+
+static int mock_breadcrumbs_smoketest(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct smoketest t = {
+		.engine = i915->engine[RCS],
+		.ncontexts = 1024,
+		.max_batch = 1024,
+		.request_alloc = __mock_request_alloc
+	};
+	unsigned int ncpus = num_online_cpus();
+	struct task_struct **threads;
+	unsigned int n;
+	int ret = 0;
+
+	/*
+	 * Smoketest our breadcrumb/signal handling for requests across multiple
+	 * threads. A very simple test to only catch the most egregious of bugs.
+	 * See __igt_breadcrumbs_smoketest();
+	 */
+
+	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
+	if (!threads)
+		return -ENOMEM;
+
+	t.contexts =
+		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
+	if (!t.contexts) {
+		ret = -ENOMEM;
+		goto out_threads;
+	}
+
+	mutex_lock(&t.engine->i915->drm.struct_mutex);
+	for (n = 0; n < t.ncontexts; n++) {
+		t.contexts[n] = mock_context(t.engine->i915, "mock");
+		if (!t.contexts[n]) {
+			ret = -ENOMEM;
+			goto out_contexts;
+		}
+	}
+	mutex_unlock(&t.engine->i915->drm.struct_mutex);
+
+	for (n = 0; n < ncpus; n++) {
+		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
+					 &t, "igt/%d", n);
+		if (IS_ERR(threads[n])) {
+			ret = PTR_ERR(threads[n]);
+			ncpus = n;
+			break;
+		}
+
+		get_task_struct(threads[n]);
+	}
+
+	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
+
+	for (n = 0; n < ncpus; n++) {
+		int err;
+
+		err = kthread_stop(threads[n]);
+		if (err < 0 && !ret)
+			ret = err;
+
+		put_task_struct(threads[n]);
+	}
+	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
+		atomic_long_read(&t.num_waits),
+		atomic_long_read(&t.num_fences),
+		ncpus);
+
+	mutex_lock(&t.engine->i915->drm.struct_mutex);
+out_contexts:
+	for (n = 0; n < t.ncontexts; n++) {
+		if (!t.contexts[n])
+			break;
+		mock_context_close(t.contexts[n]);
+	}
+	mutex_unlock(&t.engine->i915->drm.struct_mutex);
+	kfree(t.contexts);
+out_threads:
+	kfree(threads);
+
+	return ret;
+}
+
 int i915_request_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
@@ -254,6 +505,7 @@ int i915_request_mock_selftests(void)
 		SUBTEST(igt_wait_request),
 		SUBTEST(igt_fence_wait),
 		SUBTEST(igt_request_rewind),
+		SUBTEST(mock_breadcrumbs_smoketest),
 	};
 	struct drm_i915_private *i915;
 	intel_wakeref_t wakeref;
@@ -812,6 +1064,178 @@ out_unlock:
 	return err;
 }
 
+static int
+max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+	int ret;
+
+	/*
+	 * Before execlists, all contexts share the same ringbuffer. With
+	 * execlists, each context/engine has a separate ringbuffer and
+	 * for the purposes of this test, inexhaustible.
+	 *
+	 * For the global ringbuffer though, we have to be very careful
+	 * that we do not wrap while preventing the execution of requests
+	 * with a unsignaled fence.
+	 */
+	if (HAS_EXECLISTS(ctx->i915))
+		return INT_MAX;
+
+	rq = i915_request_alloc(engine, ctx);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+	} else {
+		int sz;
+
+		ret = rq->ring->size - rq->reserved_space;
+		i915_request_add(rq);
+
+		sz = rq->ring->emit - rq->head;
+		if (sz < 0)
+			sz += rq->ring->size;
+		ret /= sz;
+		ret /= 2; /* leave half spare, in case of emergency! */
+	}
+
+	return ret;
+}
+
+static int live_breadcrumbs_smoketest(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct smoketest t[I915_NUM_ENGINES];
+	unsigned int ncpus = num_online_cpus();
+	unsigned long num_waits, num_fences;
+	struct intel_engine_cs *engine;
+	struct task_struct **threads;
+	struct igt_live_test live;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	struct drm_file *file;
+	unsigned int n;
+	int ret = 0;
+
+	/*
+	 * Smoketest our breadcrumb/signal handling for requests across multiple
+	 * threads. A very simple test to only catch the most egregious of bugs.
+	 * See __igt_breadcrumbs_smoketest();
+	 *
+	 * On real hardware this time.
+	 */
+
+	wakeref = intel_runtime_pm_get(i915);
+
+	file = mock_file(i915);
+	if (IS_ERR(file)) {
+		ret = PTR_ERR(file);
+		goto out_rpm;
+	}
+
+	threads = kcalloc(ncpus * I915_NUM_ENGINES,
+			  sizeof(*threads),
+			  GFP_KERNEL);
+	if (!threads) {
+		ret = -ENOMEM;
+		goto out_file;
+	}
+
+	memset(&t[0], 0, sizeof(t[0]));
+	t[0].request_alloc = __live_request_alloc;
+	t[0].ncontexts = 64;
+	t[0].contexts = kmalloc_array(t[0].ncontexts,
+				      sizeof(*t[0].contexts),
+				      GFP_KERNEL);
+	if (!t[0].contexts) {
+		ret = -ENOMEM;
+		goto out_threads;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	for (n = 0; n < t[0].ncontexts; n++) {
+		t[0].contexts[n] = live_context(i915, file);
+		if (!t[0].contexts[n]) {
+			ret = -ENOMEM;
+			goto out_contexts;
+		}
+	}
+
+	ret = igt_live_test_begin(&live, i915, __func__, "");
+	if (ret)
+		goto out_contexts;
+
+	for_each_engine(engine, i915, id) {
+		t[id] = t[0];
+		t[id].engine = engine;
+		t[id].max_batch = max_batches(t[0].contexts[0], engine);
+		if (t[id].max_batch < 0) {
+			ret = t[id].max_batch;
+			mutex_unlock(&i915->drm.struct_mutex);
+			goto out_flush;
+		}
+		/* One ring interleaved between requests from all cpus */
+		t[id].max_batch /= num_online_cpus() + 1;
+		pr_debug("Limiting batches to %d requests on %s\n",
+			 t[id].max_batch, engine->name);
+
+		for (n = 0; n < ncpus; n++) {
+			struct task_struct *tsk;
+
+			tsk = kthread_run(__igt_breadcrumbs_smoketest,
+					  &t[id], "igt/%d.%d", id, n);
+			if (IS_ERR(tsk)) {
+				ret = PTR_ERR(tsk);
+				mutex_unlock(&i915->drm.struct_mutex);
+				goto out_flush;
+			}
+
+			get_task_struct(tsk);
+			threads[id * ncpus + n] = tsk;
+		}
+	}
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
+
+out_flush:
+	num_waits = 0;
+	num_fences = 0;
+	for_each_engine(engine, i915, id) {
+		for (n = 0; n < ncpus; n++) {
+			struct task_struct *tsk = threads[id * ncpus + n];
+			int err;
+
+			if (!tsk)
+				continue;
+
+			err = kthread_stop(tsk);
+			if (err < 0 && !ret)
+				ret = err;
+
+			put_task_struct(tsk);
+		}
+
+		num_waits += atomic_long_read(&t[id].num_waits);
+		num_fences += atomic_long_read(&t[id].num_fences);
+	}
+	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
+		num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	ret = igt_live_test_end(&live) ?: ret;
+out_contexts:
+	mutex_unlock(&i915->drm.struct_mutex);
+	kfree(t[0].contexts);
+out_threads:
+	kfree(threads);
+out_file:
+	mock_file_free(i915, file);
+out_rpm:
+	intel_runtime_pm_put(i915, wakeref);
+
+	return ret;
+}
+
 int i915_request_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -819,6 +1243,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_all_engines),
 		SUBTEST(live_sequential_engines),
 		SUBTEST(live_empty_request),
+		SUBTEST(live_breadcrumbs_smoketest),
 	};
 
 	if (i915_terminally_wedged(&i915->gpu_error))
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 0e70df0230b8..9ebd9225684e 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -185,11 +185,6 @@ void igt_spinner_fini(struct igt_spinner *spin)
 
 bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq)
 {
-	if (!wait_event_timeout(rq->execute,
-				READ_ONCE(rq->global_seqno),
-				msecs_to_jiffies(10)))
-		return false;
-
 	return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq),
 					       rq->fence.seqno),
 			     10) &&
diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
deleted file mode 100644
index f03b407fdbe2..000000000000
--- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "../i915_selftest.h"
-#include "i915_random.h"
-
-#include "mock_gem_device.h"
-#include "mock_engine.h"
-
-static int check_rbtree(struct intel_engine_cs *engine,
-			const unsigned long *bitmap,
-			const struct intel_wait *waiters,
-			const int count)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct rb_node *rb;
-	int n;
-
-	if (&b->irq_wait->node != rb_first(&b->waiters)) {
-		pr_err("First waiter does not match first element of wait-tree\n");
-		return -EINVAL;
-	}
-
-	n = find_first_bit(bitmap, count);
-	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-		struct intel_wait *w = container_of(rb, typeof(*w), node);
-		int idx = w - waiters;
-
-		if (!test_bit(idx, bitmap)) {
-			pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n",
-			       idx, w->seqno);
-			return -EINVAL;
-		}
-
-		if (n != idx) {
-			pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n",
-			       idx, w->seqno, n);
-			return -EINVAL;
-		}
-
-		n = find_next_bit(bitmap, count, n + 1);
-	}
-
-	return 0;
-}
-
-static int check_completion(struct intel_engine_cs *engine,
-			    const unsigned long *bitmap,
-			    const struct intel_wait *waiters,
-			    const int count)
-{
-	int n;
-
-	for (n = 0; n < count; n++) {
-		if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap))
-			continue;
-
-		pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n",
-		       n, waiters[n].seqno,
-		       intel_wait_complete(&waiters[n]) ? "complete" : "active",
-		       test_bit(n, bitmap) ? "active" : "complete");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int check_rbtree_empty(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	if (b->irq_wait) {
-		pr_err("Empty breadcrumbs still has a waiter\n");
-		return -EINVAL;
-	}
-
-	if (!RB_EMPTY_ROOT(&b->waiters)) {
-		pr_err("Empty breadcrumbs, but wait-tree not empty\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int igt_random_insert_remove(void *arg)
-{
-	const u32 seqno_bias = 0x1000;
-	I915_RND_STATE(prng);
-	struct intel_engine_cs *engine = arg;
-	struct intel_wait *waiters;
-	const int count = 4096;
-	unsigned int *order;
-	unsigned long *bitmap;
-	int err = -ENOMEM;
-	int n;
-
-	mock_engine_reset(engine);
-
-	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
-	if (!waiters)
-		goto out_engines;
-
-	bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap),
-			 GFP_KERNEL);
-	if (!bitmap)
-		goto out_waiters;
-
-	order = i915_random_order(count, &prng);
-	if (!order)
-		goto out_bitmap;
-
-	for (n = 0; n < count; n++)
-		intel_wait_init_for_seqno(&waiters[n], seqno_bias + n);
-
-	err = check_rbtree(engine, bitmap, waiters, count);
-	if (err)
-		goto out_order;
-
-	/* Add and remove waiters into the rbtree in random order. At each
-	 * step, we verify that the rbtree is correctly ordered.
-	 */
-	for (n = 0; n < count; n++) {
-		int i = order[n];
-
-		intel_engine_add_wait(engine, &waiters[i]);
-		__set_bit(i, bitmap);
-
-		err = check_rbtree(engine, bitmap, waiters, count);
-		if (err)
-			goto out_order;
-	}
-
-	i915_random_reorder(order, count, &prng);
-	for (n = 0; n < count; n++) {
-		int i = order[n];
-
-		intel_engine_remove_wait(engine, &waiters[i]);
-		__clear_bit(i, bitmap);
-
-		err = check_rbtree(engine, bitmap, waiters, count);
-		if (err)
-			goto out_order;
-	}
-
-	err = check_rbtree_empty(engine);
-out_order:
-	kfree(order);
-out_bitmap:
-	kfree(bitmap);
-out_waiters:
-	kvfree(waiters);
-out_engines:
-	mock_engine_flush(engine);
-	return err;
-}
-
-static int igt_insert_complete(void *arg)
-{
-	const u32 seqno_bias = 0x1000;
-	struct intel_engine_cs *engine = arg;
-	struct intel_wait *waiters;
-	const int count = 4096;
-	unsigned long *bitmap;
-	int err = -ENOMEM;
-	int n, m;
-
-	mock_engine_reset(engine);
-
-	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
-	if (!waiters)
-		goto out_engines;
-
-	bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap),
-			 GFP_KERNEL);
-	if (!bitmap)
-		goto out_waiters;
-
-	for (n = 0; n < count; n++) {
-		intel_wait_init_for_seqno(&waiters[n], n + seqno_bias);
-		intel_engine_add_wait(engine, &waiters[n]);
-		__set_bit(n, bitmap);
-	}
-	err = check_rbtree(engine, bitmap, waiters, count);
-	if (err)
-		goto out_bitmap;
-
-	/* On each step, we advance the seqno so that several waiters are then
-	 * complete (we increase the seqno by increasingly larger values to
-	 * retire more and more waiters at once). All retired waiters should
-	 * be woken and removed from the rbtree, and so that we check.
-	 */
-	for (n = 0; n < count; n = m) {
-		int seqno = 2 * n;
-
-		GEM_BUG_ON(find_first_bit(bitmap, count) != n);
-
-		if (intel_wait_complete(&waiters[n])) {
-			pr_err("waiter[%d, seqno=%d] completed too early\n",
-			       n, waiters[n].seqno);
-			err = -EINVAL;
-			goto out_bitmap;
-		}
-
-		/* complete the following waiters */
-		mock_seqno_advance(engine, seqno + seqno_bias);
-		for (m = n; m <= seqno; m++) {
-			if (m == count)
-				break;
-
-			GEM_BUG_ON(!test_bit(m, bitmap));
-			__clear_bit(m, bitmap);
-		}
-
-		intel_engine_remove_wait(engine, &waiters[n]);
-		RB_CLEAR_NODE(&waiters[n].node);
-
-		err = check_rbtree(engine, bitmap, waiters, count);
-		if (err) {
-			pr_err("rbtree corrupt after seqno advance to %d\n",
-			       seqno + seqno_bias);
-			goto out_bitmap;
-		}
-
-		err = check_completion(engine, bitmap, waiters, count);
-		if (err) {
-			pr_err("completions after seqno advance to %d failed\n",
-			       seqno + seqno_bias);
-			goto out_bitmap;
-		}
-	}
-
-	err = check_rbtree_empty(engine);
-out_bitmap:
-	kfree(bitmap);
-out_waiters:
-	kvfree(waiters);
-out_engines:
-	mock_engine_flush(engine);
-	return err;
-}
-
-struct igt_wakeup {
-	struct task_struct *tsk;
-	atomic_t *ready, *set, *done;
-	struct intel_engine_cs *engine;
-	unsigned long flags;
-#define STOP 0
-#define IDLE 1
-	wait_queue_head_t *wq;
-	u32 seqno;
-};
-
-static bool wait_for_ready(struct igt_wakeup *w)
-{
-	DEFINE_WAIT(ready);
-
-	set_bit(IDLE, &w->flags);
-	if (atomic_dec_and_test(w->done))
-		wake_up_var(w->done);
-
-	if (test_bit(STOP, &w->flags))
-		goto out;
-
-	for (;;) {
-		prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE);
-		if (atomic_read(w->ready) == 0)
-			break;
-
-		schedule();
-	}
-	finish_wait(w->wq, &ready);
-
-out:
-	clear_bit(IDLE, &w->flags);
-	if (atomic_dec_and_test(w->set))
-		wake_up_var(w->set);
-
-	return !test_bit(STOP, &w->flags);
-}
-
-static int igt_wakeup_thread(void *arg)
-{
-	struct igt_wakeup *w = arg;
-	struct intel_wait wait;
-
-	while (wait_for_ready(w)) {
-		GEM_BUG_ON(kthread_should_stop());
-
-		intel_wait_init_for_seqno(&wait, w->seqno);
-		intel_engine_add_wait(w->engine, &wait);
-		for (;;) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			if (i915_seqno_passed(intel_engine_get_seqno(w->engine),
-					      w->seqno))
-				break;
-
-			if (test_bit(STOP, &w->flags)) /* emergency escape */
-				break;
-
-			schedule();
-		}
-		intel_engine_remove_wait(w->engine, &wait);
-		__set_current_state(TASK_RUNNING);
-	}
-
-	return 0;
-}
-
-static void igt_wake_all_sync(atomic_t *ready,
-			      atomic_t *set,
-			      atomic_t *done,
-			      wait_queue_head_t *wq,
-			      int count)
-{
-	atomic_set(set, count);
-	atomic_set(ready, 0);
-	wake_up_all(wq);
-
-	wait_var_event(set, !atomic_read(set));
-	atomic_set(ready, count);
-	atomic_set(done, count);
-}
-
-static int igt_wakeup(void *arg)
-{
-	I915_RND_STATE(prng);
-	struct intel_engine_cs *engine = arg;
-	struct igt_wakeup *waiters;
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-	const int count = 4096;
-	const u32 max_seqno = count / 4;
-	atomic_t ready, set, done;
-	int err = -ENOMEM;
-	int n, step;
-
-	mock_engine_reset(engine);
-
-	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
-	if (!waiters)
-		goto out_engines;
-
-	/* Create a large number of threads, each waiting on a random seqno.
-	 * Multiple waiters will be waiting for the same seqno.
-	 */
-	atomic_set(&ready, count);
-	for (n = 0; n < count; n++) {
-		waiters[n].wq = &wq;
-		waiters[n].ready = &ready;
-		waiters[n].set = &set;
-		waiters[n].done = &done;
-		waiters[n].engine = engine;
-		waiters[n].flags = BIT(IDLE);
-
-		waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n],
-					     "i915/igt:%d", n);
-		if (IS_ERR(waiters[n].tsk))
-			goto out_waiters;
-
-		get_task_struct(waiters[n].tsk);
-	}
-
-	for (step = 1; step <= max_seqno; step <<= 1) {
-		u32 seqno;
-
-		/* The waiter threads start paused as we assign them a random
-		 * seqno and reset the engine. Once the engine is reset,
-		 * we signal that the threads may begin their wait upon their
-		 * seqno.
-		 */
-		for (n = 0; n < count; n++) {
-			GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags));
-			waiters[n].seqno =
-				1 + prandom_u32_state(&prng) % max_seqno;
-		}
-		mock_seqno_advance(engine, 0);
-		igt_wake_all_sync(&ready, &set, &done, &wq, count);
-
-		/* Simulate the GPU doing chunks of work, with one or more
-		 * seqno appearing to finish at the same time. A random number
-		 * of threads will be waiting upon the update and hopefully be
-		 * woken.
-		 */
-		for (seqno = 1; seqno <= max_seqno + step; seqno += step) {
-			usleep_range(50, 500);
-			mock_seqno_advance(engine, seqno);
-		}
-		GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno);
-
-		/* With the seqno now beyond any of the waiting threads, they
-		 * should all be woken, see that they are complete and signal
-		 * that they are ready for the next test. We wait until all
-		 * threads are complete and waiting for us (i.e. not a seqno).
-		 */
-		if (!wait_var_event_timeout(&done,
-					    !atomic_read(&done), 10 * HZ)) {
-			pr_err("Timed out waiting for %d remaining waiters\n",
-			       atomic_read(&done));
-			err = -ETIMEDOUT;
-			break;
-		}
-
-		err = check_rbtree_empty(engine);
-		if (err)
-			break;
-	}
-
-out_waiters:
-	for (n = 0; n < count; n++) {
-		if (IS_ERR(waiters[n].tsk))
-			break;
-
-		set_bit(STOP, &waiters[n].flags);
-	}
-	mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */
-	igt_wake_all_sync(&ready, &set, &done, &wq, n);
-
-	for (n = 0; n < count; n++) {
-		if (IS_ERR(waiters[n].tsk))
-			break;
-
-		kthread_stop(waiters[n].tsk);
-		put_task_struct(waiters[n].tsk);
-	}
-
-	kvfree(waiters);
-out_engines:
-	mock_engine_flush(engine);
-	return err;
-}
-
-int intel_breadcrumbs_mock_selftests(void)
-{
-	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_random_insert_remove),
-		SUBTEST(igt_insert_complete),
-		SUBTEST(igt_wakeup),
-	};
-	struct drm_i915_private *i915;
-	int err;
-
-	i915 = mock_gem_device();
-	if (!i915)
-		return -ENOMEM;
-
-	err = i915_subtests(tests, i915->engine[RCS]);
-	drm_dev_put(&i915->drm);
-
-	return err;
-}
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 2c38ea5892d9..7b6f3bea9ef8 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -1127,7 +1127,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 
 	wait_for_completion(&arg.completion);
 
-	if (wait_for(waitqueue_active(&rq->execute), 10)) {
+	if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {
 		struct drm_printer p = drm_info_printer(i915->drm.dev);
 
 		pr_err("igt/evict_vma kthread did not wait\n");
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
index b26f07b55d86..2bfa72c1654b 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
@@ -76,3 +76,57 @@ void timed_fence_fini(struct timed_fence *tf)
 	destroy_timer_on_stack(&tf->timer);
 	i915_sw_fence_fini(&tf->fence);
 }
+
+struct heap_fence {
+	struct i915_sw_fence fence;
+	union {
+		struct kref ref;
+		struct rcu_head rcu;
+	};
+};
+
+static int __i915_sw_fence_call
+heap_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct heap_fence *h = container_of(fence, typeof(*h), fence);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		break;
+
+	case FENCE_FREE:
+		heap_fence_put(&h->fence);
+	}
+
+	return NOTIFY_DONE;
+}
+
+struct i915_sw_fence *heap_fence_create(gfp_t gfp)
+{
+	struct heap_fence *h;
+
+	h = kmalloc(sizeof(*h), gfp);
+	if (!h)
+		return NULL;
+
+	i915_sw_fence_init(&h->fence, heap_fence_notify);
+	refcount_set(&h->ref.refcount, 2);
+
+	return &h->fence;
+}
+
+static void heap_fence_release(struct kref *ref)
+{
+	struct heap_fence *h = container_of(ref, typeof(*h), ref);
+
+	i915_sw_fence_fini(&h->fence);
+
+	kfree_rcu(h, rcu);
+}
+
+void heap_fence_put(struct i915_sw_fence *fence)
+{
+	struct heap_fence *h = container_of(fence, typeof(*h), fence);
+
+	kref_put(&h->ref, heap_fence_release);
+}
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h
index 474aafb92ae1..1f9927e10f3a 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h
@@ -39,4 +39,7 @@ struct timed_fence {
 void timed_fence_init(struct timed_fence *tf, unsigned long expires);
 void timed_fence_fini(struct timed_fence *tf);
 
+struct i915_sw_fence *heap_fence_create(gfp_t gfp);
+void heap_fence_put(struct i915_sw_fence *fence);
+
 #endif /* _LIB_SW_FENCE_H_ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 3b226ebc6bc4..08f0cab02e0f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -86,17 +86,21 @@ static struct mock_request *first_request(struct mock_engine *engine)
 static void advance(struct mock_request *request)
 {
 	list_del_init(&request->link);
-	mock_seqno_advance(request->base.engine, request->base.global_seqno);
+	intel_engine_write_global_seqno(request->base.engine,
+					request->base.global_seqno);
 	i915_request_mark_complete(&request->base);
 	GEM_BUG_ON(!i915_request_completed(&request->base));
+
+	intel_engine_queue_breadcrumbs(request->base.engine);
 }
 
 static void hw_delay_complete(struct timer_list *t)
 {
 	struct mock_engine *engine = from_timer(engine, t, hw_delay);
 	struct mock_request *request;
+	unsigned long flags;
 
-	spin_lock(&engine->hw_lock);
+	spin_lock_irqsave(&engine->hw_lock, flags);
 
 	/* Timer fired, first request is complete */
 	request = first_request(engine);
@@ -116,7 +120,7 @@ static void hw_delay_complete(struct timer_list *t)
 		advance(request);
 	}
 
-	spin_unlock(&engine->hw_lock);
+	spin_unlock_irqrestore(&engine->hw_lock, flags);
 }
 
 static void mock_context_unpin(struct intel_context *ce)
@@ -191,11 +195,12 @@ static void mock_submit_request(struct i915_request *request)
 	struct mock_request *mock = container_of(request, typeof(*mock), base);
 	struct mock_engine *engine =
 		container_of(request->engine, typeof(*engine), base);
+	unsigned long flags;
 
 	i915_request_submit(request);
 	GEM_BUG_ON(!request->global_seqno);
 
-	spin_lock_irq(&engine->hw_lock);
+	spin_lock_irqsave(&engine->hw_lock, flags);
 	list_add_tail(&mock->link, &engine->hw_queue);
 	if (mock->link.prev == &engine->hw_queue) {
 		if (mock->delay)
@@ -203,7 +208,7 @@ static void mock_submit_request(struct i915_request *request)
 		else
 			advance(mock);
 	}
-	spin_unlock_irq(&engine->hw_lock);
+	spin_unlock_irqrestore(&engine->hw_lock, flags);
 }
 
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
@@ -273,7 +278,7 @@ void mock_engine_flush(struct intel_engine_cs *engine)
 
 void mock_engine_reset(struct intel_engine_cs *engine)
 {
-	intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0);
+	intel_engine_write_global_seqno(engine, 0);
 }
 
 void mock_engine_free(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h
index 133d0c21790d..b9cc3a245f16 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.h
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.h
@@ -46,10 +46,4 @@ void mock_engine_flush(struct intel_engine_cs *engine);
 void mock_engine_reset(struct intel_engine_cs *engine);
 void mock_engine_free(struct intel_engine_cs *engine);
 
-static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno)
-{
-	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-	intel_engine_wakeup(engine);
-}
-
 #endif /* !__MOCK_ENGINE_H__ */

From 789659f4307abc1c2356db09eeefefd3f445c12d Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 29 Jan 2019 20:52:30 +0000
Subject: [PATCH 60/89] drm/i915: Drop fake breadcrumb irq

Missed breadcrumb detection is defunct due to the tight coupling with
dma_fence signaling and the myriad ways we may signal fences from
everywhere but from an interrupt, i.e. we frequently signal a fence
before we even see its interrupt. This means that even if we miss an
interrupt for a fence, it still is signaled before our breadcrumb
hangcheck fires, so simplify the breadcrumb hangchecking by moving it
into the GPU hangcheck and forgo fake interrupts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129205230.19056-3-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_debugfs.c           |  93 -----------
 drivers/gpu/drm/i915/i915_gpu_error.c         |   2 -
 drivers/gpu/drm/i915/i915_gpu_error.h         |   5 -
 drivers/gpu/drm/i915/intel_breadcrumbs.c      | 147 +-----------------
 drivers/gpu/drm/i915/intel_hangcheck.c        |   2 +
 drivers/gpu/drm/i915/intel_ringbuffer.h       |   5 -
 .../gpu/drm/i915/selftests/igt_live_test.c    |   7 -
 7 files changed, 5 insertions(+), 256 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 29d52304c189..bf3073e63af8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1321,9 +1321,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 			   intel_engine_last_submit(engine),
 			   jiffies_to_msecs(jiffies -
 					    engine->hangcheck.action_timestamp));
-		seq_printf(m, "\tfake irq active? %s\n",
-			   yesno(test_bit(engine->id,
-					  &dev_priv->gpu_error.missed_irq_rings)));
 
 		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
 			   (long long)engine->hangcheck.acthd,
@@ -3899,94 +3896,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
 			i915_wedged_get, i915_wedged_set,
 			"%llu\n");
 
-static int
-fault_irq_set(struct drm_i915_private *i915,
-	      unsigned long *irq,
-	      unsigned long val)
-{
-	int err;
-
-	err = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (err)
-		return err;
-
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_LOCKED |
-				     I915_WAIT_INTERRUPTIBLE,
-				     MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto err_unlock;
-
-	*irq = val;
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	/* Flush idle worker to disarm irq */
-	drain_delayed_work(&i915->gt.idle_work);
-
-	return 0;
-
-err_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	return err;
-}
-
-static int
-i915_ring_missed_irq_get(void *data, u64 *val)
-{
-	struct drm_i915_private *dev_priv = data;
-
-	*val = dev_priv->gpu_error.missed_irq_rings;
-	return 0;
-}
-
-static int
-i915_ring_missed_irq_set(void *data, u64 val)
-{
-	struct drm_i915_private *i915 = data;
-
-	return fault_irq_set(i915, &i915->gpu_error.missed_irq_rings, val);
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops,
-			i915_ring_missed_irq_get, i915_ring_missed_irq_set,
-			"0x%08llx\n");
-
-static int
-i915_ring_test_irq_get(void *data, u64 *val)
-{
-	struct drm_i915_private *dev_priv = data;
-
-	*val = dev_priv->gpu_error.test_irq_rings;
-
-	return 0;
-}
-
-static int
-i915_ring_test_irq_set(void *data, u64 val)
-{
-	struct drm_i915_private *i915 = data;
-
-	/* GuC keeps the user interrupt permanently enabled for submission */
-	if (USES_GUC_SUBMISSION(i915))
-		return -ENODEV;
-
-	/*
-	 * From icl, we can no longer individually mask interrupt generation
-	 * from each engine.
-	 */
-	if (INTEL_GEN(i915) >= 11)
-		return -ENODEV;
-
-	val &= INTEL_INFO(i915)->ring_mask;
-	DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val);
-
-	return fault_irq_set(i915, &i915->gpu_error.test_irq_rings, val);
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
-			i915_ring_test_irq_get, i915_ring_test_irq_set,
-			"0x%08llx\n");
-
 #define DROP_UNBOUND	BIT(0)
 #define DROP_BOUND	BIT(1)
 #define DROP_RETIRE	BIT(2)
@@ -4750,8 +4659,6 @@ static const struct i915_debugfs_files {
 } i915_debugfs_files[] = {
 	{"i915_wedged", &i915_wedged_fops},
 	{"i915_cache_sharing", &i915_cache_sharing_fops},
-	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
-	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 	{"i915_error_state", &i915_error_state_fops},
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 304a7ef7f7fb..6e2e5ed2bd0a 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -723,8 +723,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
 	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
 	err_printf(m, "CCID: 0x%08x\n", error->ccid);
-	err_printf(m, "Missed interrupts: 0x%08lx\n",
-		   m->i915->gpu_error.missed_irq_rings);
 
 	for (i = 0; i < error->nfence; i++)
 		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 74757c424aab..53b1f22dd365 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -204,8 +204,6 @@ struct i915_gpu_error {
 
 	atomic_t pending_fb_pin;
 
-	unsigned long missed_irq_rings;
-
 	/**
 	 * State variable controlling the reset flow and count
 	 *
@@ -274,9 +272,6 @@ struct i915_gpu_error {
 	 */
 	wait_queue_head_t reset_queue;
 
-	/* For missed irq/seqno simulation. */
-	unsigned long test_irq_rings;
-
 	struct i915_gpu_restart *restart;
 };
 
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index b0795b0ad227..cacaa1d04d17 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -91,7 +91,6 @@ bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 
 	spin_lock(&b->irq_lock);
 
-	b->irq_fired = true;
 	if (b->irq_armed && list_empty(&b->signalers))
 		__intel_breadcrumbs_disarm_irq(b);
 
@@ -172,86 +171,6 @@ static void signal_irq_work(struct irq_work *work)
 	intel_engine_breadcrumbs_irq(engine);
 }
 
-static unsigned long wait_timeout(void)
-{
-	return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
-}
-
-static noinline void missed_breadcrumb(struct intel_engine_cs *engine)
-{
-	if (GEM_SHOW_DEBUG()) {
-		struct drm_printer p = drm_debug_printer(__func__);
-
-		intel_engine_dump(engine, &p,
-				  "%s missed breadcrumb at %pS\n",
-				  engine->name, __builtin_return_address(0));
-	}
-
-	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
-}
-
-static void intel_breadcrumbs_hangcheck(struct timer_list *t)
-{
-	struct intel_engine_cs *engine =
-		from_timer(engine, t, breadcrumbs.hangcheck);
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	if (!b->irq_armed)
-		return;
-
-	if (b->irq_fired)
-		goto rearm;
-
-	/*
-	 * We keep the hangcheck timer alive until we disarm the irq, even
-	 * if there are no waiters at present.
-	 *
-	 * If the waiter was currently running, assume it hasn't had a chance
-	 * to process the pending interrupt (e.g, low priority task on a loaded
-	 * system) and wait until it sleeps before declaring a missed interrupt.
-	 *
-	 * If the waiter was asleep (and not even pending a wakeup), then we
-	 * must have missed an interrupt as the GPU has stopped advancing
-	 * but we still have a waiter. Assuming all batches complete within
-	 * DRM_I915_HANGCHECK_JIFFIES [1.5s]!
-	 */
-	synchronize_hardirq(engine->i915->drm.irq);
-	if (intel_engine_signal_breadcrumbs(engine)) {
-		missed_breadcrumb(engine);
-		mod_timer(&b->fake_irq, jiffies + 1);
-	} else {
-rearm:
-		b->irq_fired = false;
-		mod_timer(&b->hangcheck, wait_timeout());
-	}
-}
-
-static void intel_breadcrumbs_fake_irq(struct timer_list *t)
-{
-	struct intel_engine_cs *engine =
-		from_timer(engine, t, breadcrumbs.fake_irq);
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	/*
-	 * The timer persists in case we cannot enable interrupts,
-	 * or if we have previously seen seqno/interrupt incoherency
-	 * ("missed interrupt" syndrome, better known as a "missed breadcrumb").
-	 * Here the worker will wake up every jiffie in order to kick the
-	 * oldest waiter to do the coherent seqno check.
-	 */
-
-	if (!intel_engine_signal_breadcrumbs(engine) && !b->irq_armed)
-		return;
-
-	/* If the user has disabled the fake-irq, restore the hangchecking */
-	if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) {
-		mod_timer(&b->hangcheck, wait_timeout());
-		return;
-	}
-
-	mod_timer(&b->fake_irq, jiffies + 1);
-}
-
 void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
@@ -274,43 +193,14 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine)
 	spin_unlock_irq(&b->irq_lock);
 }
 
-static bool use_fake_irq(const struct intel_breadcrumbs *b)
-{
-	const struct intel_engine_cs *engine =
-		container_of(b, struct intel_engine_cs, breadcrumbs);
-
-	if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
-		return false;
-
-	/*
-	 * Only start with the heavy weight fake irq timer if we have not
-	 * seen any interrupts since enabling it the first time. If the
-	 * interrupts are still arriving, it means we made a mistake in our
-	 * engine->seqno_barrier(), a timing error that should be transient
-	 * and unlikely to reoccur.
-	 */
-	return !b->irq_fired;
-}
-
-static void enable_fake_irq(struct intel_breadcrumbs *b)
-{
-	/* Ensure we never sleep indefinitely */
-	if (!b->irq_enabled || use_fake_irq(b))
-		mod_timer(&b->fake_irq, jiffies + 1);
-	else
-		mod_timer(&b->hangcheck, wait_timeout());
-}
-
-static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
+static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
 	struct intel_engine_cs *engine =
 		container_of(b, struct intel_engine_cs, breadcrumbs);
-	struct drm_i915_private *i915 = engine->i915;
-	bool enabled;
 
 	lockdep_assert_held(&b->irq_lock);
 	if (b->irq_armed)
-		return false;
+		return;
 
 	/*
 	 * The breadcrumb irq will be disarmed on the interrupt after the
@@ -328,16 +218,8 @@ static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 	 * the driver is idle) we disarm the breadcrumbs.
 	 */
 
-	/* No interrupts? Kick the waiter every jiffie! */
-	enabled = false;
-	if (!b->irq_enabled++ &&
-	    !test_bit(engine->id, &i915->gpu_error.test_irq_rings)) {
+	if (!b->irq_enabled++)
 		irq_enable(engine);
-		enabled = true;
-	}
-
-	enable_fake_irq(b);
-	return enabled;
 }
 
 void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
@@ -348,18 +230,6 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 	INIT_LIST_HEAD(&b->signalers);
 
 	init_irq_work(&b->irq_work, signal_irq_work);
-
-	timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
-	timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
-}
-
-static void cancel_fake_irq(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */
-	del_timer_sync(&b->hangcheck);
-	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
 }
 
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
@@ -369,13 +239,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 
 	spin_lock_irqsave(&b->irq_lock, flags);
 
-	/*
-	 * Leave the fake_irq timer enabled (if it is running), but clear the
-	 * bit so that it turns itself off on its next wake up and goes back
-	 * to the long hangcheck interval if still required.
-	 */
-	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
-
 	if (b->irq_enabled)
 		irq_enable(engine);
 	else
@@ -386,7 +249,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 {
-	cancel_fake_irq(engine);
 }
 
 bool i915_request_enable_breadcrumb(struct i915_request *rq)
@@ -482,7 +344,4 @@ void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
 		}
 	}
 	spin_unlock_irq(&b->irq_lock);
-
-	if (test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
-		drm_printf(p, "Fake irq active\n");
 }
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 5662d6fed523..a219c796e56d 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -275,6 +275,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	for_each_engine(engine, dev_priv, id) {
 		struct hangcheck hc;
 
+		intel_engine_signal_breadcrumbs(engine);
+
 		hangcheck_load_sample(engine, &hc);
 		hangcheck_accumulate_sample(engine, &hc);
 		hangcheck_store_sample(engine, &hc);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 71f8ceb937ff..34d0a148e664 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -387,14 +387,9 @@ struct intel_engine_cs {
 
 		struct irq_work irq_work; /* for use from inside irq_lock */
 
-		struct timer_list fake_irq; /* used after a missed interrupt */
-		struct timer_list hangcheck; /* detect missed interrupts */
-
-		unsigned int hangcheck_interrupts;
 		unsigned int irq_enabled;
 
 		bool irq_armed;
-		bool irq_fired;
 	} breadcrumbs;
 
 	struct {
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 5deb485fb942..3e902761cd16 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -35,7 +35,6 @@ int igt_live_test_begin(struct igt_live_test *t,
 		return err;
 	}
 
-	i915->gpu_error.missed_irq_rings = 0;
 	t->reset_global = i915_reset_count(&i915->gpu_error);
 
 	for_each_engine(engine, i915, id)
@@ -75,11 +74,5 @@ int igt_live_test_end(struct igt_live_test *t)
 		return -EIO;
 	}
 
-	if (i915->gpu_error.missed_irq_rings) {
-		pr_err("%s(%s): Missed interrupts on engines %lx\n",
-		       t->func, t->name, i915->gpu_error.missed_irq_rings);
-		return -EIO;
-	}
-
 	return 0;
 }

From 584fca111d0cf32f6002472d9cb13c4555587438 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 25 Jan 2019 14:24:41 -0800
Subject: [PATCH 61/89] drm/i915/icl: use tc_port in MG_PLL macros

Fix the TODO leftover in the code by changing the argument in MG_PLL
macros. The MG_PLL ids used to access the register values can be
converted from tc_port rather than port.

All these registers can use the TC port to calculate the right offsets
because they are only available for TC ports. The range (PORT_C onwards)
may not be stable and change from platform to platform. So by using the
TC id directly we avoid having to check for the platform in the "leaf
functions" and thus passing dev_priv around.

The helper functions were also renamed to use "tc" as prefix to make
them more generic.

v2: Improve commit message and fix checkpatch warning (from Paulo)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125222444.19926-2-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/i915_reg.h       | 52 +++++++++---------
 drivers/gpu/drm/i915/intel_ddi.c      |  7 ++-
 drivers/gpu/drm/i915/intel_display.c  |  2 +-
 drivers/gpu/drm/i915/intel_dpll_mgr.c | 79 +++++++++++++--------------
 drivers/gpu/drm/i915/intel_dpll_mgr.h |  2 +-
 5 files changed, 71 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 1eca166d95bb..03adcf3838de 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -9554,7 +9554,7 @@ enum skl_power_gate {
 #define _MG_PLL3_ENABLE		0x46038
 #define _MG_PLL4_ENABLE		0x4603C
 /* Bits are the same as DPLL0_ENABLE */
-#define MG_PLL_ENABLE(port)	_MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \
+#define MG_PLL_ENABLE(tc_port)	_MMIO_PORT((tc_port), _MG_PLL1_ENABLE, \
 					   _MG_PLL2_ENABLE)
 
 #define _MG_REFCLKIN_CTL_PORT1				0x16892C
@@ -9563,9 +9563,9 @@ enum skl_power_gate {
 #define _MG_REFCLKIN_CTL_PORT4				0x16B92C
 #define   MG_REFCLKIN_CTL_OD_2_MUX(x)			((x) << 8)
 #define   MG_REFCLKIN_CTL_OD_2_MUX_MASK			(0x7 << 8)
-#define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \
-					 _MG_REFCLKIN_CTL_PORT1, \
-					 _MG_REFCLKIN_CTL_PORT2)
+#define MG_REFCLKIN_CTL(tc_port) _MMIO_PORT((tc_port), \
+					    _MG_REFCLKIN_CTL_PORT1, \
+					    _MG_REFCLKIN_CTL_PORT2)
 
 #define _MG_CLKTOP2_CORECLKCTL1_PORT1			0x1688D8
 #define _MG_CLKTOP2_CORECLKCTL1_PORT2			0x1698D8
@@ -9575,9 +9575,9 @@ enum skl_power_gate {
 #define   MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO_MASK	(0xff << 16)
 #define   MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x)		((x) << 8)
 #define   MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK	(0xff << 8)
-#define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \
-						_MG_CLKTOP2_CORECLKCTL1_PORT1, \
-						_MG_CLKTOP2_CORECLKCTL1_PORT2)
+#define MG_CLKTOP2_CORECLKCTL1(tc_port) _MMIO_PORT((tc_port), \
+						   _MG_CLKTOP2_CORECLKCTL1_PORT1, \
+						   _MG_CLKTOP2_CORECLKCTL1_PORT2)
 
 #define _MG_CLKTOP2_HSCLKCTL_PORT1			0x1688D4
 #define _MG_CLKTOP2_HSCLKCTL_PORT2			0x1698D4
@@ -9595,9 +9595,9 @@ enum skl_power_gate {
 #define   MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x)		((x) << 8)
 #define   MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_SHIFT		8
 #define   MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK		(0xf << 8)
-#define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \
-					     _MG_CLKTOP2_HSCLKCTL_PORT1, \
-					     _MG_CLKTOP2_HSCLKCTL_PORT2)
+#define MG_CLKTOP2_HSCLKCTL(tc_port) _MMIO_PORT((tc_port), \
+						_MG_CLKTOP2_HSCLKCTL_PORT1, \
+						_MG_CLKTOP2_HSCLKCTL_PORT2)
 
 #define _MG_PLL_DIV0_PORT1				0x168A00
 #define _MG_PLL_DIV0_PORT2				0x169A00
@@ -9609,8 +9609,8 @@ enum skl_power_gate {
 #define   MG_PLL_DIV0_FBDIV_FRAC(x)			((x) << 8)
 #define   MG_PLL_DIV0_FBDIV_INT_MASK			(0xff << 0)
 #define   MG_PLL_DIV0_FBDIV_INT(x)			((x) << 0)
-#define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \
-				     _MG_PLL_DIV0_PORT2)
+#define MG_PLL_DIV0(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV0_PORT1, \
+					_MG_PLL_DIV0_PORT2)
 
 #define _MG_PLL_DIV1_PORT1				0x168A04
 #define _MG_PLL_DIV1_PORT2				0x169A04
@@ -9624,8 +9624,8 @@ enum skl_power_gate {
 #define   MG_PLL_DIV1_NDIVRATIO(x)			((x) << 4)
 #define   MG_PLL_DIV1_FBPREDIV_MASK			(0xf << 0)
 #define   MG_PLL_DIV1_FBPREDIV(x)			((x) << 0)
-#define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \
-				     _MG_PLL_DIV1_PORT2)
+#define MG_PLL_DIV1(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV1_PORT1, \
+					_MG_PLL_DIV1_PORT2)
 
 #define _MG_PLL_LF_PORT1				0x168A08
 #define _MG_PLL_LF_PORT2				0x169A08
@@ -9637,8 +9637,8 @@ enum skl_power_gate {
 #define   MG_PLL_LF_GAINCTRL(x)				((x) << 16)
 #define   MG_PLL_LF_INT_COEFF(x)			((x) << 8)
 #define   MG_PLL_LF_PROP_COEFF(x)			((x) << 0)
-#define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \
-				   _MG_PLL_LF_PORT2)
+#define MG_PLL_LF(tc_port) _MMIO_PORT((tc_port), _MG_PLL_LF_PORT1, \
+				      _MG_PLL_LF_PORT2)
 
 #define _MG_PLL_FRAC_LOCK_PORT1				0x168A0C
 #define _MG_PLL_FRAC_LOCK_PORT2				0x169A0C
@@ -9650,9 +9650,9 @@ enum skl_power_gate {
 #define   MG_PLL_FRAC_LOCK_DCODITHEREN			(1 << 10)
 #define   MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN		(1 << 8)
 #define   MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x)		((x) << 0)
-#define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \
-					  _MG_PLL_FRAC_LOCK_PORT1, \
-					  _MG_PLL_FRAC_LOCK_PORT2)
+#define MG_PLL_FRAC_LOCK(tc_port) _MMIO_PORT((tc_port), \
+					     _MG_PLL_FRAC_LOCK_PORT1, \
+					     _MG_PLL_FRAC_LOCK_PORT2)
 
 #define _MG_PLL_SSC_PORT1				0x168A10
 #define _MG_PLL_SSC_PORT2				0x169A10
@@ -9664,8 +9664,8 @@ enum skl_power_gate {
 #define   MG_PLL_SSC_STEPNUM(x)				((x) << 10)
 #define   MG_PLL_SSC_FLLEN				(1 << 9)
 #define   MG_PLL_SSC_STEPSIZE(x)			((x) << 0)
-#define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \
-				    _MG_PLL_SSC_PORT2)
+#define MG_PLL_SSC(tc_port) _MMIO_PORT((tc_port), _MG_PLL_SSC_PORT1, \
+				       _MG_PLL_SSC_PORT2)
 
 #define _MG_PLL_BIAS_PORT1				0x168A14
 #define _MG_PLL_BIAS_PORT2				0x169A14
@@ -9684,8 +9684,8 @@ enum skl_power_gate {
 #define   MG_PLL_BIAS_VREF_RDAC_MASK			(0x7 << 5)
 #define   MG_PLL_BIAS_IREFTRIM(x)			((x) << 0)
 #define   MG_PLL_BIAS_IREFTRIM_MASK			(0x1f << 0)
-#define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \
-				     _MG_PLL_BIAS_PORT2)
+#define MG_PLL_BIAS(tc_port) _MMIO_PORT((tc_port), _MG_PLL_BIAS_PORT1, \
+					_MG_PLL_BIAS_PORT2)
 
 #define _MG_PLL_TDC_COLDST_BIAS_PORT1			0x168A18
 #define _MG_PLL_TDC_COLDST_BIAS_PORT2			0x169A18
@@ -9696,9 +9696,9 @@ enum skl_power_gate {
 #define   MG_PLL_TDC_COLDST_COLDSTART			(1 << 16)
 #define   MG_PLL_TDC_TDCOVCCORR_EN			(1 << 2)
 #define   MG_PLL_TDC_TDCSEL(x)				((x) << 0)
-#define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \
-						_MG_PLL_TDC_COLDST_BIAS_PORT1, \
-						_MG_PLL_TDC_COLDST_BIAS_PORT2)
+#define MG_PLL_TDC_COLDST_BIAS(tc_port) _MMIO_PORT((tc_port), \
+						   _MG_PLL_TDC_COLDST_BIAS_PORT1, \
+						   _MG_PLL_TDC_COLDST_BIAS_PORT2)
 
 #define _CNL_DPLL0_CFGCR0		0x6C000
 #define _CNL_DPLL1_CFGCR0		0x6C080
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index acd94354afc8..ae2413ec2196 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -1391,16 +1391,17 @@ static int icl_calc_tbt_pll_link(struct drm_i915_private *dev_priv,
 static int icl_calc_mg_pll_link(struct drm_i915_private *dev_priv,
 				enum port port)
 {
+	enum tc_port tc_port = intel_port_to_tc(dev_priv, port);
 	u32 mg_pll_div0, mg_clktop_hsclkctl;
 	u32 m1, m2_int, m2_frac, div1, div2, refclk;
 	u64 tmp;
 
 	refclk = dev_priv->cdclk.hw.ref;
 
-	mg_pll_div0 = I915_READ(MG_PLL_DIV0(port));
-	mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(port));
+	mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port));
+	mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port));
 
-	m1 = I915_READ(MG_PLL_DIV1(port)) & MG_PLL_DIV1_FBPREDIV_MASK;
+	m1 = I915_READ(MG_PLL_DIV1(tc_port)) & MG_PLL_DIV1_FBPREDIV_MASK;
 	m2_int = mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK;
 	m2_frac = (mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) ?
 		  (mg_pll_div0 & MG_PLL_DIV0_FBDIV_FRAC_MASK) >>
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 734ac77c9a8e..efd1683f61bc 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9445,7 +9445,7 @@ static void icelake_get_ddi_pll(struct drm_i915_private *dev_priv,
 		if (WARN_ON(!intel_dpll_is_combophy(id)))
 			return;
 	} else if (intel_port_is_tc(dev_priv, port)) {
-		id = icl_port_to_mg_pll_id(port);
+		id = icl_tc_port_to_pll_id(intel_port_to_tc(dev_priv, port));
 	} else {
 		WARN(1, "Invalid port %x\n", port);
 		return;
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index 606f54dde086..211b3ffa5bed 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -2639,14 +2639,14 @@ int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv,
 	return link_clock;
 }
 
-static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id)
+static enum tc_port icl_pll_id_to_tc_port(enum intel_dpll_id id)
 {
-	return id - DPLL_ID_ICL_MGPLL1 + PORT_C;
+	return id - DPLL_ID_ICL_MGPLL1;
 }
 
-enum intel_dpll_id icl_port_to_mg_pll_id(enum port port)
+enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port)
 {
-	return port - PORT_C + DPLL_ID_ICL_MGPLL1;
+	return tc_port + DPLL_ID_ICL_MGPLL1;
 }
 
 bool intel_dpll_is_combophy(enum intel_dpll_id id)
@@ -2925,7 +2925,10 @@ icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state,
 			ret = icl_calc_dpll_state(crtc_state, encoder, clock,
 						  &pll_state);
 		} else {
-			min = icl_port_to_mg_pll_id(port);
+			enum tc_port tc_port;
+
+			tc_port = intel_port_to_tc(dev_priv, port);
+			min = icl_tc_port_to_pll_id(tc_port);
 			max = min;
 			ret = icl_calc_mg_pll_state(crtc_state, encoder, clock,
 						    &pll_state);
@@ -2959,12 +2962,8 @@ static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id)
 		return CNL_DPLL_ENABLE(id);
 	else if (id == DPLL_ID_ICL_TBTPLL)
 		return TBT_PLL_ENABLE;
-	else
-		/*
-		 * TODO: Make MG_PLL macros use
-		 * tc port id instead of port id
-		 */
-		return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id));
+
+	return MG_PLL_ENABLE(icl_pll_id_to_tc_port(id));
 }
 
 static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
@@ -2974,7 +2973,6 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	const enum intel_dpll_id id = pll->info->id;
 	intel_wakeref_t wakeref;
 	bool ret = false;
-	enum port port;
 	u32 val;
 
 	wakeref = intel_display_power_get_if_enabled(dev_priv,
@@ -2991,32 +2989,33 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 		hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id));
 		hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id));
 	} else {
-		port = icl_mg_pll_id_to_port(id);
-		hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port));
+		enum tc_port tc_port = icl_pll_id_to_tc_port(id);
+
+		hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(tc_port));
 		hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK;
 
 		hw_state->mg_clktop2_coreclkctl1 =
-			I915_READ(MG_CLKTOP2_CORECLKCTL1(port));
+			I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port));
 		hw_state->mg_clktop2_coreclkctl1 &=
 			MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK;
 
 		hw_state->mg_clktop2_hsclkctl =
-			I915_READ(MG_CLKTOP2_HSCLKCTL(port));
+			I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port));
 		hw_state->mg_clktop2_hsclkctl &=
 			MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK |
 			MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK |
 			MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK |
 			MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK;
 
-		hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port));
-		hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port));
-		hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port));
-		hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port));
-		hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port));
+		hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port));
+		hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(tc_port));
+		hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(tc_port));
+		hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(tc_port));
+		hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(tc_port));
 
-		hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port));
+		hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(tc_port));
 		hw_state->mg_pll_tdc_coldst_bias =
-			I915_READ(MG_PLL_TDC_COLDST_BIAS(port));
+			I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));
 
 		if (dev_priv->cdclk.hw.ref == 38400) {
 			hw_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART;
@@ -3051,7 +3050,7 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv,
 			     struct intel_shared_dpll *pll)
 {
 	struct intel_dpll_hw_state *hw_state = &pll->state.hw_state;
-	enum port port = icl_mg_pll_id_to_port(pll->info->id);
+	enum tc_port tc_port = icl_pll_id_to_tc_port(pll->info->id);
 	u32 val;
 
 	/*
@@ -3060,41 +3059,41 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv,
 	 * during the calc/readout phase if the mask depends on some other HW
 	 * state like refclk, see icl_calc_mg_pll_state().
 	 */
-	val = I915_READ(MG_REFCLKIN_CTL(port));
+	val = I915_READ(MG_REFCLKIN_CTL(tc_port));
 	val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK;
 	val |= hw_state->mg_refclkin_ctl;
-	I915_WRITE(MG_REFCLKIN_CTL(port), val);
+	I915_WRITE(MG_REFCLKIN_CTL(tc_port), val);
 
-	val = I915_READ(MG_CLKTOP2_CORECLKCTL1(port));
+	val = I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port));
 	val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK;
 	val |= hw_state->mg_clktop2_coreclkctl1;
-	I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), val);
+	I915_WRITE(MG_CLKTOP2_CORECLKCTL1(tc_port), val);
 
-	val = I915_READ(MG_CLKTOP2_HSCLKCTL(port));
+	val = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port));
 	val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK |
 		 MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK |
 		 MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK |
 		 MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK);
 	val |= hw_state->mg_clktop2_hsclkctl;
-	I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), val);
+	I915_WRITE(MG_CLKTOP2_HSCLKCTL(tc_port), val);
 
-	I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0);
-	I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1);
-	I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf);
-	I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock);
-	I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc);
+	I915_WRITE(MG_PLL_DIV0(tc_port), hw_state->mg_pll_div0);
+	I915_WRITE(MG_PLL_DIV1(tc_port), hw_state->mg_pll_div1);
+	I915_WRITE(MG_PLL_LF(tc_port), hw_state->mg_pll_lf);
+	I915_WRITE(MG_PLL_FRAC_LOCK(tc_port), hw_state->mg_pll_frac_lock);
+	I915_WRITE(MG_PLL_SSC(tc_port), hw_state->mg_pll_ssc);
 
-	val = I915_READ(MG_PLL_BIAS(port));
+	val = I915_READ(MG_PLL_BIAS(tc_port));
 	val &= ~hw_state->mg_pll_bias_mask;
 	val |= hw_state->mg_pll_bias;
-	I915_WRITE(MG_PLL_BIAS(port), val);
+	I915_WRITE(MG_PLL_BIAS(tc_port), val);
 
-	val = I915_READ(MG_PLL_TDC_COLDST_BIAS(port));
+	val = I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));
 	val &= ~hw_state->mg_pll_tdc_coldst_bias_mask;
 	val |= hw_state->mg_pll_tdc_coldst_bias;
-	I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), val);
+	I915_WRITE(MG_PLL_TDC_COLDST_BIAS(tc_port), val);
 
-	POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port));
+	POSTING_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));
 }
 
 static void icl_pll_enable(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h
index e96e79413b54..40e8391a92f2 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.h
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h
@@ -344,7 +344,7 @@ void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv,
 int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv,
 			       u32 pll_id);
 int cnl_hdmi_pll_ref_clock(struct drm_i915_private *dev_priv);
-enum intel_dpll_id icl_port_to_mg_pll_id(enum port port);
+enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port);
 bool intel_dpll_is_combophy(enum intel_dpll_id id);
 
 #endif /* _INTEL_DPLL_MGR_H_ */

From 7a61a6dec3dfb9f2e8c39a337580a3c3036c5cdf Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 25 Jan 2019 14:24:42 -0800
Subject: [PATCH 62/89] drm/i915: always return something on DDI clock
 selection

Even if we don't have the correct clock and get a warning, we should not
skip the return.

v2: improve commit message (from Joonas)

Fixes: 1fa11ee2d9d0 ("drm/i915/icl: start adding the TBT pll")
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: <stable@vger.kernel.org> # v4.19+
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Mika Kahola <mika.kahola@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125222444.19926-3-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/intel_ddi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index ae2413ec2196..51d4be072268 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -1021,7 +1021,7 @@ static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder,
 			return DDI_CLK_SEL_TBT_810;
 		default:
 			MISSING_CASE(clock);
-			break;
+			return DDI_CLK_SEL_NONE;
 		}
 	case DPLL_ID_ICL_MGPLL1:
 	case DPLL_ID_ICL_MGPLL2:

From 20fd2ab7be40658ded75f8a49cf649845c1e3572 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 25 Jan 2019 14:24:43 -0800
Subject: [PATCH 63/89] drm/i915/icl: remove dpll from clk_sel

We should not pass DPLL_ID_ICL_DPLL0 or DPLL_ID_ICL_DPLL1 to this
function because the path is only taken for non-combophy ports. Let the
warning trigger if improper value is given.

While at it, rename the function to match the register name we are
trying to program.

v2: fix typo in comment

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125222444.19926-4-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/intel_ddi.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 51d4be072268..ca705546a0ab 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -995,7 +995,7 @@ static u32 hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll)
 	}
 }
 
-static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder,
+static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state)
 {
 	const struct intel_shared_dpll *pll = crtc_state->shared_dpll;
@@ -1004,10 +1004,11 @@ static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder,
 
 	switch (id) {
 	default:
+		/*
+		 * DPLL_ID_ICL_DPLL0 and DPLL_ID_ICL_DPLL1 should not be used
+		 * here, so do warn if this get passed in
+		 */
 		MISSING_CASE(id);
-		/* fall through */
-	case DPLL_ID_ICL_DPLL0:
-	case DPLL_ID_ICL_DPLL1:
 		return DDI_CLK_SEL_NONE;
 	case DPLL_ID_ICL_TBTPLL:
 		switch (clock) {
@@ -2869,7 +2870,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
 	if (IS_ICELAKE(dev_priv)) {
 		if (!intel_port_is_combophy(dev_priv, port))
 			I915_WRITE(DDI_CLK_SEL(port),
-				   icl_pll_to_ddi_pll_sel(encoder, crtc_state));
+				   icl_pll_to_ddi_clk_sel(encoder, crtc_state));
 	} else if (IS_CANNONLAKE(dev_priv)) {
 		/* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */
 		val = I915_READ(DPCLKA_CFGCR0);

From 5b0bd14dcc6bf0d28ad908aa1b8835667f860c84 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 25 Jan 2019 14:24:44 -0800
Subject: [PATCH 64/89] drm/i915/icl: keep track of unused pll while looping

Instead of looping again on the range of plls, just keep track of one
unused one and use it later.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125222444.19926-5-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/intel_dpll_mgr.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index 211b3ffa5bed..8f70838ac7d8 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -247,7 +247,7 @@ intel_find_shared_dpll(struct intel_crtc *crtc,
 		       enum intel_dpll_id range_max)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	struct intel_shared_dpll *pll;
+	struct intel_shared_dpll *pll, *unused_pll = NULL;
 	struct intel_shared_dpll_state *shared_dpll;
 	enum intel_dpll_id i;
 
@@ -257,8 +257,10 @@ intel_find_shared_dpll(struct intel_crtc *crtc,
 		pll = &dev_priv->shared_dplls[i];
 
 		/* Only want to check enabled timings first */
-		if (shared_dpll[i].crtc_mask == 0)
+		if (shared_dpll[i].crtc_mask == 0) {
+			unused_pll = pll;
 			continue;
+		}
 
 		if (memcmp(&crtc_state->dpll_hw_state,
 			   &shared_dpll[i].hw_state,
@@ -273,14 +275,11 @@ intel_find_shared_dpll(struct intel_crtc *crtc,
 	}
 
 	/* Ok no matching timings, maybe there's a free one? */
-	for (i = range_min; i <= range_max; i++) {
-		pll = &dev_priv->shared_dplls[i];
-		if (shared_dpll[i].crtc_mask == 0) {
-			DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n",
-				      crtc->base.base.id, crtc->base.name,
-				      pll->info->name);
-			return pll;
-		}
+	if (unused_pll) {
+		DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n",
+			      crtc->base.base.id, crtc->base.name,
+			      unused_pll->info->name);
+		return unused_pll;
 	}
 
 	return NULL;

From b52c273be6876107a9f77987d9d42cee36d0081e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 21 Dec 2018 19:14:28 +0200
Subject: [PATCH 65/89] drm/i915: Don't ignore level 0 lines watermark for glk+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On glk+ the level 0 lines watermark actually matters. Do not ignore it.
And while at it let's change things so that we always program a
consistnet 0 to the register when the lines watermarks is ignored
by the hardware.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-2-ville.syrjala@linux.intel.com
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index fdc28a3d2936..e517bdd2c2f9 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4674,6 +4674,15 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
 	return 0;
 }
 
+static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level)
+{
+	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+		return true;
+
+	/* The number of lines are ignored for the level 0 watermark. */
+	return level > 0;
+}
+
 static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 				 const struct intel_plane_state *intel_pstate,
 				 int level,
@@ -4756,8 +4765,10 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 		}
 	}
 
-	/* The number of lines are ignored for the level 0 watermark. */
-	if (level > 0 && res_lines > 31)
+	if (!skl_wm_has_lines(dev_priv, level))
+		res_lines = 0;
+
+	if (res_lines > 31)
 		return;
 
 	/*

From 692927f4e9058fbdae1baecf72d85659bbc3c59c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 21 Dec 2018 19:14:29 +0200
Subject: [PATCH 66/89] drm/i915: Reinstate an early latency==0 check for skl+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I thought we could remove all the early latency==0 checks
and rely on skl_wm_method{1,2}() checking for it. But
skl_compute_plane_wm() applies a bunch of workarounds to bump
up the latency before calling those guys so clearly it won't
end up doing the right thing. Also not sure if the calculations
based on the method1/2 results are safe agaisnt overflows so
it might not work all that well in any case. Let's put the
early check back.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-3-ville.syrjala@linux.intel.com
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index e517bdd2c2f9..7473d6ea4bfe 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4700,6 +4700,9 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 		to_intel_atomic_state(cstate->base.state);
 	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
 
+	if (latency == 0)
+		return;
+
 	/* Display WA #1141: kbl,cfl */
 	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
 	    IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) &&

From 17b16054b1115559df126c9d7e727770194d39e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 21 Dec 2018 19:14:30 +0200
Subject: [PATCH 67/89] drm/i915: Fix bits vs. bytes mixup in dbuf block size
 computation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The spec used to say "8bpp" which someone took to mean 8 bytes per
pixel when in fact it was supposed to be 8 bits per pixel. The
spec has been updated to make it more clear now. Fix the code
to match.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-4-ville.syrjala@linux.intel.com
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 7473d6ea4bfe..59e186b30d97 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4617,7 +4617,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
 							     intel_pstate);
 
 	if (INTEL_GEN(dev_priv) >= 11 &&
-	    fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8)
+	    fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 1)
 		wp->dbuf_block_size = 256;
 	else
 		wp->dbuf_block_size = 512;

From b19c9bcaa20ec10bc39389f2b8bfe4c57cde7cbd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 21 Dec 2018 19:14:31 +0200
Subject: [PATCH 68/89] drm/i915: Fix > vs >= mismatch in watermark/ddb
 calculations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bspec says we have to reject the watermark if it's >= the ddb
allocation. Fix the code to reject the == case as it should.
For transition watermarks we can just use >=, for the rest
we'll do +1 when calculating the minimum ddb allocation size.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-5-ville.syrjala@linux.intel.com
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 59e186b30d97..fff44512c44e 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4371,8 +4371,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 				continue;
 
 			wm = &cstate->wm.skl.optimal.planes[plane_id];
-			blocks += wm->wm[level].plane_res_b;
-			blocks += wm->uv_wm[level].plane_res_b;
+			blocks += wm->wm[level].plane_res_b + 1;
+			blocks += wm->uv_wm[level].plane_res_b + 1;
 		}
 
 		if (blocks < alloc_size) {
@@ -4413,7 +4413,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 		extra = min_t(u16, alloc_size,
 			      DIV64_U64_ROUND_UP(alloc_size * rate,
 						 total_data_rate));
-		total[plane_id] = wm->wm[level].plane_res_b + extra;
+		total[plane_id] = wm->wm[level].plane_res_b + 1 + extra;
 		alloc_size -= extra;
 		total_data_rate -= rate;
 
@@ -4424,7 +4424,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 		extra = min_t(u16, alloc_size,
 			      DIV64_U64_ROUND_UP(alloc_size * rate,
 						 total_data_rate));
-		uv_total[plane_id] = wm->uv_wm[level].plane_res_b + extra;
+		uv_total[plane_id] = wm->uv_wm[level].plane_res_b + 1 + extra;
 		alloc_size -= extra;
 		total_data_rate -= rate;
 	}
@@ -4477,7 +4477,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 	 */
 	for_each_plane_id_on_crtc(intel_crtc, plane_id) {
 		wm = &cstate->wm.skl.optimal.planes[plane_id];
-		if (wm->trans_wm.plane_res_b > total[plane_id])
+		if (wm->trans_wm.plane_res_b >= total[plane_id])
 			memset(&wm->trans_wm, 0, sizeof(wm->trans_wm));
 	}
 

From 961d95e09c045fe2998c74da96961edc8658f171 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 21 Dec 2018 19:14:32 +0200
Subject: [PATCH 69/89] drm/i915: Account for minimum ddb allocation
 restrictions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On icl+ bspec tells us to calculate a separate minimum ddb
allocation from the blocks watermark. Both have to be checked
against the actual ddb allocation, but since we do things the
other way around we'll just calculat the minimum acceptable
ddb allocation by taking the maximum of the two values.

We'll also replace the memcmp() with a full trawl over the
the watermarks so that it'll ignore the min_ddb_alloc
because we can't directly read that out from the hw. I suppose
we could reconstruct it from the other values, but I was
too lazy to do that now.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-6-ville.syrjala@linux.intel.com
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/intel_pm.c | 53 +++++++++++++++++++++++++++------
 2 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d072f3369ee1..d1cecc588fbb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1114,6 +1114,7 @@ struct skl_ddb_values {
 };
 
 struct skl_wm_level {
+	u16 min_ddb_alloc;
 	u16 plane_res_b;
 	u8 plane_res_l;
 	bool plane_en;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index fff44512c44e..abb018f49a0e 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4371,8 +4371,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 				continue;
 
 			wm = &cstate->wm.skl.optimal.planes[plane_id];
-			blocks += wm->wm[level].plane_res_b + 1;
-			blocks += wm->uv_wm[level].plane_res_b + 1;
+			blocks += wm->wm[level].min_ddb_alloc;
+			blocks += wm->uv_wm[level].min_ddb_alloc;
 		}
 
 		if (blocks < alloc_size) {
@@ -4413,7 +4413,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 		extra = min_t(u16, alloc_size,
 			      DIV64_U64_ROUND_UP(alloc_size * rate,
 						 total_data_rate));
-		total[plane_id] = wm->wm[level].plane_res_b + 1 + extra;
+		total[plane_id] = wm->wm[level].min_ddb_alloc + extra;
 		alloc_size -= extra;
 		total_data_rate -= rate;
 
@@ -4424,7 +4424,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 		extra = min_t(u16, alloc_size,
 			      DIV64_U64_ROUND_UP(alloc_size * rate,
 						 total_data_rate));
-		uv_total[plane_id] = wm->uv_wm[level].plane_res_b + 1 + extra;
+		uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra;
 		alloc_size -= extra;
 		total_data_rate -= rate;
 	}
@@ -4695,7 +4695,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 	u32 latency = dev_priv->wm.skl_latency[level];
 	uint_fixed_16_16_t method1, method2;
 	uint_fixed_16_16_t selected_result;
-	u32 res_blocks, res_lines;
+	u32 res_blocks, res_lines, min_ddb_alloc = 0;
 	struct intel_atomic_state *state =
 		to_intel_atomic_state(cstate->base.state);
 	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
@@ -4768,6 +4768,24 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 		}
 	}
 
+	if (INTEL_GEN(dev_priv) >= 11) {
+		if (wp->y_tiled) {
+			int extra_lines;
+
+			if (res_lines % wp->y_min_scanlines == 0)
+				extra_lines = wp->y_min_scanlines;
+			else
+				extra_lines = wp->y_min_scanlines * 2 -
+					res_lines % wp->y_min_scanlines;
+
+			min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines,
+								 wp->plane_blocks_per_line);
+		} else {
+			min_ddb_alloc = res_blocks +
+				DIV_ROUND_UP(res_blocks, 10);
+		}
+	}
+
 	if (!skl_wm_has_lines(dev_priv, level))
 		res_lines = 0;
 
@@ -4782,6 +4800,8 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 	 */
 	result->plane_res_b = res_blocks;
 	result->plane_res_l = res_lines;
+	/* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */
+	result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1;
 	result->plane_en = true;
 }
 
@@ -5132,6 +5152,23 @@ static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,
 	return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);
 }
 
+static bool skl_pipe_wm_equals(struct intel_crtc *crtc,
+			       const struct skl_pipe_wm *wm1,
+			       const struct skl_pipe_wm *wm2)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum plane_id plane_id;
+
+	for_each_plane_id_on_crtc(crtc, plane_id) {
+		if (!skl_plane_wm_equals(dev_priv,
+					 &wm1->planes[plane_id],
+					 &wm2->planes[plane_id]))
+			return false;
+	}
+
+	return wm1->linetime == wm2->linetime;
+}
+
 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
 					   const struct skl_ddb_entry *b)
 {
@@ -5158,16 +5195,14 @@ static int skl_update_pipe_wm(struct intel_crtc_state *cstate,
 			      struct skl_pipe_wm *pipe_wm, /* out */
 			      bool *changed /* out */)
 {
+	struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
 	int ret;
 
 	ret = skl_build_pipe_wm(cstate, pipe_wm);
 	if (ret)
 		return ret;
 
-	if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm)))
-		*changed = false;
-	else
-		*changed = true;
+	*changed = !skl_pipe_wm_equals(crtc, old_pipe_wm, pipe_wm);
 
 	return 0;
 }

From 60e983ff187f3df4a83e977c90580243f4a71dbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 21 Dec 2018 19:14:33 +0200
Subject: [PATCH 70/89] drm/i915: Pass dev_priv to skl_needs_memory_bw_wa()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

skl_needs_memory_bw_wa() doesn't look at the passed in state at all.
Possibly it should, but for now let's make life simpler by just
passing in dev_priv.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-7-ville.syrjala@linux.intel.com
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index abb018f49a0e..78c893dce4d1 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3631,14 +3631,9 @@ static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
  * FIXME: We still don't have the proper code detect if we need to apply the WA,
  * so assume we'll always need it in order to avoid underruns.
  */
-static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
+static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-
-	if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv))
-		return true;
-
-	return false;
+	return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv);
 }
 
 static bool
@@ -3790,7 +3785,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
 
 		latency = dev_priv->wm.skl_latency[level];
 
-		if (skl_needs_memory_bw_wa(intel_state) &&
+		if (skl_needs_memory_bw_wa(dev_priv) &&
 		    plane->base.state->fb->modifier ==
 		    I915_FORMAT_MOD_X_TILED)
 			latency += 15;
@@ -4579,9 +4574,6 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
 	const struct drm_plane_state *pstate = &intel_pstate->base;
 	const struct drm_framebuffer *fb = pstate->fb;
 	u32 interm_pbpl;
-	struct intel_atomic_state *state =
-		to_intel_atomic_state(cstate->base.state);
-	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
 
 	/* only NV12 format has two planes */
 	if (color_plane == 1 && fb->format->format != DRM_FORMAT_NV12) {
@@ -4642,7 +4634,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
 		wp->y_min_scanlines = 4;
 	}
 
-	if (apply_memory_bw_wa)
+	if (skl_needs_memory_bw_wa(dev_priv))
 		wp->y_min_scanlines *= 2;
 
 	wp->plane_bytes_per_line = wp->width * wp->cpp;
@@ -4696,9 +4688,6 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 	uint_fixed_16_16_t method1, method2;
 	uint_fixed_16_16_t selected_result;
 	u32 res_blocks, res_lines, min_ddb_alloc = 0;
-	struct intel_atomic_state *state =
-		to_intel_atomic_state(cstate->base.state);
-	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
 
 	if (latency == 0)
 		return;
@@ -4709,7 +4698,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 	    dev_priv->ipc_enabled)
 		latency += 4;
 
-	if (apply_memory_bw_wa && wp->x_tiled)
+	if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled)
 		latency += 15;
 
 	method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,

From ff61a97499fd27944c02b9c8035d50d803a7975f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 21 Dec 2018 19:14:34 +0200
Subject: [PATCH 71/89] drm/i915: Drop the definite article in front of SAGV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The spec doesn't use a definite article in front of SAGV. The
rules regarding articles and initialisms are super fuzzy, but
at least to my ears it sounds much more natural to not have
the article. Perhaps because I tend to pronounce it as
"sag-vee" instead of spelling out the letters one at a time.
Actually I might still prefer to leave out the article if I
did spell them out.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-8-ville.syrjala@linux.intel.com
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 78c893dce4d1..c78674ef616f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3665,25 +3665,25 @@ intel_enable_sagv(struct drm_i915_private *dev_priv)
 	if (dev_priv->sagv_status == I915_SAGV_ENABLED)
 		return 0;
 
-	DRM_DEBUG_KMS("Enabling the SAGV\n");
+	DRM_DEBUG_KMS("Enabling SAGV\n");
 	mutex_lock(&dev_priv->pcu_lock);
 
 	ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
 				      GEN9_SAGV_ENABLE);
 
-	/* We don't need to wait for the SAGV when enabling */
+	/* We don't need to wait for SAGV when enabling */
 	mutex_unlock(&dev_priv->pcu_lock);
 
 	/*
 	 * Some skl systems, pre-release machines in particular,
-	 * don't actually have an SAGV.
+	 * don't actually have SAGV.
 	 */
 	if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
 		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
 		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
 		return 0;
 	} else if (ret < 0) {
-		DRM_ERROR("Failed to enable the SAGV\n");
+		DRM_ERROR("Failed to enable SAGV\n");
 		return ret;
 	}
 
@@ -3702,7 +3702,7 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 	if (dev_priv->sagv_status == I915_SAGV_DISABLED)
 		return 0;
 
-	DRM_DEBUG_KMS("Disabling the SAGV\n");
+	DRM_DEBUG_KMS("Disabling SAGV\n");
 	mutex_lock(&dev_priv->pcu_lock);
 
 	/* bspec says to keep retrying for at least 1 ms */
@@ -3714,14 +3714,14 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 
 	/*
 	 * Some skl systems, pre-release machines in particular,
-	 * don't actually have an SAGV.
+	 * don't actually have SAGV.
 	 */
 	if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
 		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
 		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
 		return 0;
 	} else if (ret < 0) {
-		DRM_ERROR("Failed to disable the SAGV (%d)\n", ret);
+		DRM_ERROR("Failed to disable SAGV (%d)\n", ret);
 		return ret;
 	}
 
@@ -3752,7 +3752,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
 		sagv_block_time_us = 10;
 
 	/*
-	 * SKL+ workaround: bspec recommends we disable the SAGV when we have
+	 * SKL+ workaround: bspec recommends we disable SAGV when we have
 	 * more then one pipe enabled
 	 *
 	 * If there are no active CRTCs, no additional checks need be performed
@@ -3793,7 +3793,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
 		/*
 		 * If any of the planes on this pipe don't enable wm levels that
 		 * incur memory latencies higher than sagv_block_time_us we
-		 * can't enable the SAGV.
+		 * can't enable SAGV.
 		 */
 		if (latency < sagv_block_time_us)
 			return false;

From ff58c11cdbe99aaed8759caa39952e7355d50b78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 21 Dec 2018 19:14:35 +0200
Subject: [PATCH 72/89] drm/i915: Drop the pointless linetime==0 check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

0*whatever==0 so this check is pointless. Remove it.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-9-ville.syrjala@linux.intel.com
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c78674ef616f..1d1c56961b39 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4824,10 +4824,6 @@ skl_compute_linetime_wm(const struct intel_crtc_state *cstate)
 	u32 linetime_wm;
 
 	linetime_us = intel_get_linetime_us(cstate);
-
-	if (is_fixed16_zero(linetime_us))
-		return 0;
-
 	linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
 
 	/* Display WA #1135: bxt:ALL GLK:ALL */

From 717671c610fc4119ec4be87a99d8fe35e5863725 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 21 Dec 2018 19:14:36 +0200
Subject: [PATCH 73/89] drm/i915: Use IS_GEN9_LP() for the linetime w/a check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

IS_GLK||IS_BXT == IS_GEN9_LP

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-10-ville.syrjala@linux.intel.com
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1d1c56961b39..53b706154c94 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4826,9 +4826,8 @@ skl_compute_linetime_wm(const struct intel_crtc_state *cstate)
 	linetime_us = intel_get_linetime_us(cstate);
 	linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
 
-	/* Display WA #1135: bxt:ALL GLK:ALL */
-	if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) &&
-	    dev_priv->ipc_enabled)
+	/* Display WA #1135: BXT:ALL GLK:ALL */
+	if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled)
 		linetime_wm /= 2;
 
 	return linetime_wm;

From d54e5f76d8fe025fa557857d826515116d124a90 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 29 Jan 2019 17:39:13 -0800
Subject: [PATCH 74/89] drm/i915: Update DRIVER_DATE to 20190129

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d1cecc588fbb..22da9df1f0a7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -91,8 +91,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20190124"
-#define DRIVER_TIMESTAMP	1548370857
+#define DRIVER_DATE		"20190129"
+#define DRIVER_TIMESTAMP	1548812351
 
 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
  * WARN_ON()) for hw state sanity checks to check for unexpected conditions

From c0550305fcbd82c0b0d094e385551d9c63fc8852 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 30 Jan 2019 10:51:20 -0800
Subject: [PATCH 75/89] drm/i915: Force background color to black for gen9+
 (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We don't yet allow userspace to control the CRTC background color, but
we should manually program the color to black to ensure the BIOS didn't
leave us with some other color.  We should also set the pipe gamma and
pipe CSC bits so that the background color goes through the same color
management transformations that a plane with black pixels would.

v2: Rename register to SKL_BOTTOM_COLOR to more closely follow
    bspec naming.  (Ville)

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190130185122.10322-2-matthew.d.roper@intel.com
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h      |  6 ++++++
 drivers/gpu/drm/i915/intel_display.c | 19 +++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 03adcf3838de..a64deeb4e517 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5710,6 +5710,12 @@ enum {
 #define   PIPEMISC_DITHER_TYPE_SP	(0 << 2)
 #define PIPEMISC(pipe)			_MMIO_PIPE2(pipe, _PIPE_MISC_A)
 
+/* Skylake+ pipe bottom (background) color */
+#define _SKL_BOTTOM_COLOR_A		0x70034
+#define   SKL_BOTTOM_COLOR_GAMMA_ENABLE	(1 << 31)
+#define   SKL_BOTTOM_COLOR_CSC_ENABLE	(1 << 30)
+#define SKL_BOTTOM_COLOR(pipe)		_MMIO_PIPE2(pipe, _SKL_BOTTOM_COLOR_A)
+
 #define VLV_DPFLIPSTAT				_MMIO(VLV_DISPLAY_BASE + 0x70028)
 #define   PIPEB_LINE_COMPARE_INT_EN		(1 << 29)
 #define   PIPEB_HLINE_INT_EN			(1 << 28)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index efd1683f61bc..b0bb8adcf26f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3930,6 +3930,16 @@ static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_sta
 		else if (old_crtc_state->pch_pfit.enabled)
 			ironlake_pfit_disable(old_crtc_state);
 	}
+
+	/*
+	 * We don't (yet) allow userspace to control the pipe background color,
+	 * so force it to black, but apply pipe gamma and CSC so that its
+	 * handling will match how we program our planes.
+	 */
+	if (INTEL_GEN(dev_priv) >= 9)
+		I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe),
+			   SKL_BOTTOM_COLOR_GAMMA_ENABLE |
+			   SKL_BOTTOM_COLOR_CSC_ENABLE);
 }
 
 static void intel_fdi_normal_train(struct intel_crtc *crtc)
@@ -15488,6 +15498,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
 			    plane->base.type != DRM_PLANE_TYPE_PRIMARY)
 				intel_plane_disable_noatomic(crtc, plane);
 		}
+
+		/*
+		 * Disable any background color set by the BIOS, but enable the
+		 * gamma and CSC to match how we program our planes.
+		 */
+		if (INTEL_GEN(dev_priv) >= 9)
+			I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe),
+				   SKL_BOTTOM_COLOR_GAMMA_ENABLE |
+				   SKL_BOTTOM_COLOR_CSC_ENABLE);
 	}
 
 	/* Adjust the state of the output pipe according to whether we

From e4c0d5314dede34ac48257cbdd4b3a046b2415df Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 30 Jan 2019 10:10:22 -0800
Subject: [PATCH 76/89] drm/i915: Apply LUT validation checks to platforms more
 accurately (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use of the new DRM_COLOR_LUT_NON_DECREASING test was a bit over-zealous;
it doesn't actually need to be applied to the degamma on "bdw-style"
platforms.  Likewise, we overlooked the fact that CHV should have that
test applied to the gamma LUT as well as the degamma LUT.

Rather than adding more complicated platform checking to
intel_color_check(), let's just store the appropriate set of LUT
validation flags for each platform in the intel_device_info structure.

v2:
 - Shuffle around LUT size tests so that the hardware-specific tests
   won't be applied to legacy gamma tables.  (Ville)
 - Add a debug message so that it will be easier to understand why an
   atomic transaction involving incorrectly-sized LUT's got rejected
   by the driver.

v3:
 - Switch size_t's to int's.  (Ville)

Fixes: 85e2d61e4976 ("drm/i915: Validate userspace-provided color management LUT's (v4)")
References: https://lists.freedesktop.org/archives/intel-gfx/2019-January/187634.html
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190130181022.4291-1-matthew.d.roper@intel.com
---
 drivers/gpu/drm/i915/i915_pci.c          | 10 +++-
 drivers/gpu/drm/i915/intel_color.c       | 64 ++++++++++++------------
 drivers/gpu/drm/i915/intel_device_info.h |  2 +
 3 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index dd4aff2b256e..6f7ad10f34b8 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -67,9 +67,15 @@
 #define BDW_COLORS \
 	.color = { .degamma_lut_size = 512, .gamma_lut_size = 512 }
 #define CHV_COLORS \
-	.color = { .degamma_lut_size = 65, .gamma_lut_size = 257 }
+	.color = { .degamma_lut_size = 65, .gamma_lut_size = 257, \
+		   .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \
+		   .gamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \
+	}
 #define GLK_COLORS \
-	.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 }
+	.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024, \
+		   .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING | \
+					DRM_COLOR_LUT_EQUAL_CHANNELS, \
+	}
 
 /* Keep in gen based order, and chronological order within a gen */
 
diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
index bc7589656a8f..4b0044cdcf1a 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -605,48 +605,48 @@ void intel_color_load_luts(struct intel_crtc_state *crtc_state)
 	dev_priv->display.load_luts(crtc_state);
 }
 
+static int check_lut_size(const struct drm_property_blob *lut, int expected)
+{
+	int len;
+
+	if (!lut)
+		return 0;
+
+	len = drm_color_lut_size(lut);
+	if (len != expected) {
+		DRM_DEBUG_KMS("Invalid LUT size; got %d, expected %d\n",
+			      len, expected);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int intel_color_check(struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
-	size_t gamma_length, degamma_length;
-	uint32_t tests = DRM_COLOR_LUT_NON_DECREASING;
+	int gamma_length, degamma_length;
+	u32 gamma_tests, degamma_tests;
 
 	degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size;
 	gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size;
+	degamma_tests = INTEL_INFO(dev_priv)->color.degamma_lut_tests;
+	gamma_tests = INTEL_INFO(dev_priv)->color.gamma_lut_tests;
 
-	/*
-	 * All of our platforms mandate that the degamma curve be
-	 * non-decreasing.  Additionally, GLK and gen11 only accept a single
-	 * value for red, green, and blue in the degamma table.  Make sure
-	 * userspace didn't try to pass us something we can't handle.
-	 *
-	 * We don't have any extra hardware constraints on the gamma table,
-	 * so no need to explicitly check it.
-	 */
-	if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
-		tests |= DRM_COLOR_LUT_EQUAL_CHANNELS;
-
-	if (drm_color_lut_check(crtc_state->base.degamma_lut, tests) != 0)
-		return -EINVAL;
-
-	/*
-	 * We allow both degamma & gamma luts at the right size or
-	 * NULL.
-	 */
-	if ((!crtc_state->base.degamma_lut ||
-	     drm_color_lut_size(crtc_state->base.degamma_lut) == degamma_length) &&
-	    (!crtc_state->base.gamma_lut ||
-	     drm_color_lut_size(crtc_state->base.gamma_lut) == gamma_length))
-		return 0;
-
-	/*
-	 * We also allow no degamma lut/ctm and a gamma lut at the legacy
-	 * size (256 entries).
-	 */
+	/* Always allow legacy gamma LUT with no further checking. */
 	if (crtc_state_is_legacy_gamma(crtc_state))
 		return 0;
 
-	return -EINVAL;
+	if (check_lut_size(crtc_state->base.degamma_lut, degamma_length) ||
+	    check_lut_size(crtc_state->base.gamma_lut, gamma_length))
+		return -EINVAL;
+
+	if (drm_color_lut_check(crtc_state->base.degamma_lut, degamma_tests) ||
+	    drm_color_lut_check(crtc_state->base.gamma_lut, gamma_tests))
+		return -EINVAL;
+
+
+	return 0;
 }
 
 void intel_color_init(struct intel_crtc *crtc)
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 957c6527f76b..7bf09cef591a 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -189,6 +189,8 @@ struct intel_device_info {
 	struct color_luts {
 		u16 degamma_lut_size;
 		u16 gamma_lut_size;
+		u32 degamma_lut_tests;
+		u32 gamma_lut_tests;
 	} color;
 };
 

From 49220789617bdc439c95146eb94529c0fcf01043 Mon Sep 17 00:00:00 2001
From: Hang Yuan <hang.yuan@linux.intel.com>
Date: Wed, 30 Jan 2019 18:25:52 +0800
Subject: [PATCH 77/89] drm/i915/gvt: add functions to get default resolution

These functions will get default resolution according to vgpu type.

Signed-off-by: Hang Yuan <hang.yuan@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c |  1 +
 drivers/gpu/drm/i915/gvt/display.h | 37 +++++++++++++++++++++++++-----
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 4f25b6b7728e..6a86faccbe3f 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -342,6 +342,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
 	port->dpcd->data_valid = true;
 	port->dpcd->data[DPCD_SINK_COUNT] = 0x1;
 	port->type = type;
+	port->id = resolution;
 
 	emulate_monitor_status_change(vgpu);
 
diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h
index ea7c1c525b8c..a87f33e6a23c 100644
--- a/drivers/gpu/drm/i915/gvt/display.h
+++ b/drivers/gpu/drm/i915/gvt/display.h
@@ -146,18 +146,19 @@ enum intel_vgpu_port_type {
 	GVT_PORT_MAX
 };
 
+enum intel_vgpu_edid {
+	GVT_EDID_1024_768,
+	GVT_EDID_1920_1200,
+	GVT_EDID_NUM,
+};
+
 struct intel_vgpu_port {
 	/* per display EDID information */
 	struct intel_vgpu_edid_data *edid;
 	/* per display DPCD information */
 	struct intel_vgpu_dpcd_data *dpcd;
 	int type;
-};
-
-enum intel_vgpu_edid {
-	GVT_EDID_1024_768,
-	GVT_EDID_1920_1200,
-	GVT_EDID_NUM,
+	enum intel_vgpu_edid id;
 };
 
 static inline char *vgpu_edid_str(enum intel_vgpu_edid id)
@@ -172,6 +173,30 @@ static inline char *vgpu_edid_str(enum intel_vgpu_edid id)
 	}
 }
 
+static inline unsigned int vgpu_edid_xres(enum intel_vgpu_edid id)
+{
+	switch (id) {
+	case GVT_EDID_1024_768:
+		return 1024;
+	case GVT_EDID_1920_1200:
+		return 1920;
+	default:
+		return 0;
+	}
+}
+
+static inline unsigned int vgpu_edid_yres(enum intel_vgpu_edid id)
+{
+	switch (id) {
+	case GVT_EDID_1024_768:
+		return 768;
+	case GVT_EDID_1920_1200:
+		return 1200;
+	default:
+		return 0;
+	}
+}
+
 void intel_gvt_emulate_vblank(struct intel_gvt *gvt);
 void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt);
 

From 1ca20f33df42ee61a4ae1bf4f2abf8e5f77a16fc Mon Sep 17 00:00:00 2001
From: Hang Yuan <hang.yuan@linux.intel.com>
Date: Wed, 30 Jan 2019 18:25:53 +0800
Subject: [PATCH 78/89] drm/i915/gvt: add hotplug emulation

Add function to emulate hotplug interrupt for SKL/KBL platforms

Signed-off-by: Hang Yuan <hang.yuan@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c | 30 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/gvt/gvt.c     |  1 +
 drivers/gpu/drm/i915/gvt/gvt.h     |  3 +++
 3 files changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 6a86faccbe3f..035479e273be 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -445,6 +445,36 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt)
 	mutex_unlock(&gvt->lock);
 }
 
+/**
+ * intel_vgpu_emulate_hotplug - trigger hotplug event for vGPU
+ * @vgpu: a vGPU
+ * @conncted: link state
+ *
+ * This function is used to trigger hotplug interrupt for vGPU
+ *
+ */
+void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+
+	/* TODO: add more platforms support */
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+		if (connected) {
+			vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
+				SFUSE_STRAP_DDID_DETECTED;
+			vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
+		} else {
+			vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
+				~SFUSE_STRAP_DDID_DETECTED;
+			vgpu_vreg_t(vgpu, SDEISR) &= ~SDE_PORTD_HOTPLUG_CPT;
+		}
+		vgpu_vreg_t(vgpu, SDEIIR) |= SDE_PORTD_HOTPLUG_CPT;
+		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTD_HOTPLUG_STATUS_MASK;
+		intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG);
+	}
+}
+
 /**
  * intel_vgpu_clean_display - clean vGPU virtual display emulation
  * @vgpu: a vGPU
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index 4e8947f33bd0..43f4242062dd 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -185,6 +185,7 @@ static const struct intel_gvt_ops intel_gvt_ops = {
 	.vgpu_query_plane = intel_vgpu_query_plane,
 	.vgpu_get_dmabuf = intel_vgpu_get_dmabuf,
 	.write_protect_handler = intel_vgpu_page_track_handler,
+	.emulate_hotplug = intel_vgpu_emulate_hotplug,
 };
 
 static void init_device_info(struct intel_gvt *gvt)
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 8a4cf995d755..0ba4b42e3bb0 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -536,6 +536,8 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,
 int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes);
 
+void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected);
+
 static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar)
 {
 	/* We are 64bit bar. */
@@ -577,6 +579,7 @@ struct intel_gvt_ops {
 	int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int);
 	int (*write_protect_handler)(struct intel_vgpu *, u64, void *,
 				     unsigned int);
+	void (*emulate_hotplug)(struct intel_vgpu *vgpu, bool connected);
 };
 
 

From 39c68e87bc50a71bcfe93582d9b0673ef30db418 Mon Sep 17 00:00:00 2001
From: Hang Yuan <hang.yuan@linux.intel.com>
Date: Wed, 30 Jan 2019 18:25:54 +0800
Subject: [PATCH 79/89] drm/i915/gvt: add VFIO EDID region

Implement VFIO EDID region for vgpu. Support EDID blob update and notify
guest on link state change via hotplug event.

v3: move struct edid_region to kvmgt.c <zhenyu>
v2: add EDID sanity check and size update <zhenyu>

Tested-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Hang Yuan <hang.yuan@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/hypercall.h |   1 +
 drivers/gpu/drm/i915/gvt/kvmgt.c     | 143 +++++++++++++++++++++++++++
 drivers/gpu/drm/i915/gvt/mpt.h       |  17 ++++
 drivers/gpu/drm/i915/gvt/vgpu.c      |   6 ++
 4 files changed, 167 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index 50798868ab15..5e01cc8d9b16 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -67,6 +67,7 @@ struct intel_gvt_mpt {
 	int (*set_trap_area)(unsigned long handle, u64 start, u64 end,
 			     bool map);
 	int (*set_opregion)(void *vgpu);
+	int (*set_edid)(void *vgpu, int port_num);
 	int (*get_vfio_device)(void *vgpu);
 	void (*put_vfio_device)(void *vgpu);
 	bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn);
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index f8d44e8f86a6..63eef86a2a85 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -57,6 +57,8 @@ static const struct intel_gvt_ops *intel_gvt_ops;
 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
 
+#define EDID_BLOB_OFFSET (PAGE_SIZE/2)
+
 #define OPREGION_SIGNATURE "IntelGraphicsMem"
 
 struct vfio_region;
@@ -76,6 +78,11 @@ struct vfio_region {
 	void				*data;
 };
 
+struct vfio_edid_region {
+	struct vfio_region_gfx_edid vfio_edid_regs;
+	void *edid_blob;
+};
+
 struct kvmgt_pgfn {
 	gfn_t gfn;
 	struct hlist_node hnode;
@@ -427,6 +434,111 @@ static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
 	.release = intel_vgpu_reg_release_opregion,
 };
 
+static int handle_edid_regs(struct intel_vgpu *vgpu,
+			struct vfio_edid_region *region, char *buf,
+			size_t count, u16 offset, bool is_write)
+{
+	struct vfio_region_gfx_edid *regs = &region->vfio_edid_regs;
+	unsigned int data;
+
+	if (offset + count > sizeof(*regs))
+		return -EINVAL;
+
+	if (count != 4)
+		return -EINVAL;
+
+	if (is_write) {
+		data = *((unsigned int *)buf);
+		switch (offset) {
+		case offsetof(struct vfio_region_gfx_edid, link_state):
+			if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) {
+				if (!drm_edid_block_valid(
+					(u8 *)region->edid_blob,
+					0,
+					true,
+					NULL)) {
+					gvt_vgpu_err("invalid EDID blob\n");
+					return -EINVAL;
+				}
+				intel_gvt_ops->emulate_hotplug(vgpu, true);
+			} else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN)
+				intel_gvt_ops->emulate_hotplug(vgpu, false);
+			else {
+				gvt_vgpu_err("invalid EDID link state %d\n",
+					regs->link_state);
+				return -EINVAL;
+			}
+			regs->link_state = data;
+			break;
+		case offsetof(struct vfio_region_gfx_edid, edid_size):
+			if (data > regs->edid_max_size) {
+				gvt_vgpu_err("EDID size is bigger than %d!\n",
+					regs->edid_max_size);
+				return -EINVAL;
+			}
+			regs->edid_size = data;
+			break;
+		default:
+			/* read-only regs */
+			gvt_vgpu_err("write read-only EDID region at offset %d\n",
+				offset);
+			return -EPERM;
+		}
+	} else {
+		memcpy(buf, (char *)regs + offset, count);
+	}
+
+	return count;
+}
+
+static int handle_edid_blob(struct vfio_edid_region *region, char *buf,
+			size_t count, u16 offset, bool is_write)
+{
+	if (offset + count > region->vfio_edid_regs.edid_size)
+		return -EINVAL;
+
+	if (is_write)
+		memcpy(region->edid_blob + offset, buf, count);
+	else
+		memcpy(buf, region->edid_blob + offset, count);
+
+	return count;
+}
+
+static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf,
+		size_t count, loff_t *ppos, bool iswrite)
+{
+	int ret;
+	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
+			VFIO_PCI_NUM_REGIONS;
+	struct vfio_edid_region *region =
+		(struct vfio_edid_region *)vgpu->vdev.region[i].data;
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+
+	if (pos < region->vfio_edid_regs.edid_offset) {
+		ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite);
+	} else {
+		pos -= EDID_BLOB_OFFSET;
+		ret = handle_edid_blob(region, buf, count, pos, iswrite);
+	}
+
+	if (ret < 0)
+		gvt_vgpu_err("failed to access EDID region\n");
+
+	return ret;
+}
+
+static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu,
+					struct vfio_region *region)
+{
+	kfree(region->data);
+}
+
+static const struct intel_vgpu_regops intel_vgpu_regops_edid = {
+	.rw = intel_vgpu_reg_rw_edid,
+	.release = intel_vgpu_reg_release_edid,
+};
+
 static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
 		unsigned int type, unsigned int subtype,
 		const struct intel_vgpu_regops *ops,
@@ -493,6 +605,36 @@ static int kvmgt_set_opregion(void *p_vgpu)
 	return ret;
 }
 
+static int kvmgt_set_edid(void *p_vgpu, int port_num)
+{
+	struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
+	struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
+	struct vfio_edid_region *base;
+	int ret;
+
+	base = kzalloc(sizeof(*base), GFP_KERNEL);
+	if (!base)
+		return -ENOMEM;
+
+	/* TODO: Add multi-port and EDID extension block support */
+	base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET;
+	base->vfio_edid_regs.edid_max_size = EDID_SIZE;
+	base->vfio_edid_regs.edid_size = EDID_SIZE;
+	base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id);
+	base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id);
+	base->edid_blob = port->edid->edid_block;
+
+	ret = intel_vgpu_register_reg(vgpu,
+			VFIO_REGION_TYPE_GFX,
+			VFIO_REGION_SUBTYPE_GFX_EDID,
+			&intel_vgpu_regops_edid, EDID_SIZE,
+			VFIO_REGION_INFO_FLAG_READ |
+			VFIO_REGION_INFO_FLAG_WRITE |
+			VFIO_REGION_INFO_FLAG_CAPS, base);
+
+	return ret;
+}
+
 static void kvmgt_put_vfio_device(void *vgpu)
 {
 	if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device))
@@ -1874,6 +2016,7 @@ static struct intel_gvt_mpt kvmgt_mpt = {
 	.dma_map_guest_page = kvmgt_dma_map_guest_page,
 	.dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
 	.set_opregion = kvmgt_set_opregion,
+	.set_edid = kvmgt_set_edid,
 	.get_vfio_device = kvmgt_get_vfio_device,
 	.put_vfio_device = kvmgt_put_vfio_device,
 	.is_valid_gfn = kvmgt_is_valid_gfn,
diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
index 9b4225d44243..5d8b8f228d8f 100644
--- a/drivers/gpu/drm/i915/gvt/mpt.h
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -313,6 +313,23 @@ static inline int intel_gvt_hypervisor_set_opregion(struct intel_vgpu *vgpu)
 	return intel_gvt_host.mpt->set_opregion(vgpu);
 }
 
+/**
+ * intel_gvt_hypervisor_set_edid - Set EDID region for guest
+ * @vgpu: a vGPU
+ * @port_num: display port number
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+static inline int intel_gvt_hypervisor_set_edid(struct intel_vgpu *vgpu,
+						int port_num)
+{
+	if (!intel_gvt_host.mpt->set_edid)
+		return 0;
+
+	return intel_gvt_host.mpt->set_edid(vgpu, port_num);
+}
+
 /**
  * intel_gvt_hypervisor_get_vfio_device - increase vfio device ref count
  * @vgpu: a vGPU
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index c628be05fbfe..39682b065a71 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -428,6 +428,12 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	if (ret)
 		goto out_clean_sched_policy;
 
+	/*TODO: add more platforms support */
+	if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv))
+		ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D);
+	if (ret)
+		goto out_clean_sched_policy;
+
 	return vgpu;
 
 out_clean_sched_policy:

From ad3e7b824c185125f281d56a9e8f614effcdae91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 30 Jan 2019 17:51:10 +0200
Subject: [PATCH 80/89] drm/i915: Don't use the second dbuf slice on icl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The code managing the dbuf slices is borked and needs some
real work to fix. In the meantime let's just stop using the
second slice.

v2: Drop the change to intel_enabled_dbuf_slices_num() (Mahesh)

Cc: Mahesh Kumar <mahesh1.sh.kumar@gmail.com>
Reviewed-by: Imre Deak <imre.deak@intel.com> #v1
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190130155110.12918-1-ville.syrjala@linux.intel.com
Reviewed-by: Mahesh Kumar <mahesh1.sh.kumar@gmail.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 53b706154c94..ed9786241307 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3822,8 +3822,13 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
 
 	/*
 	 * 12GB/s is maximum BW supported by single DBuf slice.
+	 *
+	 * FIXME dbuf slice code is broken:
+	 * - must wait for planes to stop using the slice before powering it off
+	 * - plane straddling both slices is illegal in multi-pipe scenarios
+	 * - should validate we stay within the hw bandwidth limits
 	 */
-	if (num_active > 1 || total_data_bw >= GBps(12)) {
+	if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
 		ddb->enabled_slices = 2;
 	} else {
 		ddb->enabled_slices = 1;

From 8aae2b1cdf4452817943741ed792068b18191465 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 30 Jan 2019 20:13:59 +0200
Subject: [PATCH 81/89] drm/i915: Pick the first unused PLL once again
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 5b0bd14dcc6b ("drm/i915/icl: keep track of unused pll while
looping") inadvertently (I presume) changed the code to pick the
last unused dpll rather than the first unused one like we did before.

While there should most likely be no harm in changing the order
let's change back just to avoid a change in the behaviour. At
least it might reduce the confusion when staring at logs (took
me a while to figure out why DPLL1 being picked over DPLL0
when the latter was most definitely available).

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190130181359.20693-1-ville.syrjala@linux.intel.com
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/i915/intel_dpll_mgr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index 8f70838ac7d8..0a42d11c4c33 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -258,7 +258,8 @@ intel_find_shared_dpll(struct intel_crtc *crtc,
 
 		/* Only want to check enabled timings first */
 		if (shared_dpll[i].crtc_mask == 0) {
-			unused_pll = pll;
+			if (!unused_pll)
+				unused_pll = pll;
 			continue;
 		}
 

From 828ccb31cf410a1a01e2bdc2a3cf0dd5aabb755d Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Mon, 28 Jan 2019 13:42:42 +0200
Subject: [PATCH 82/89] drm/i915/icl: Add TypeC ports only if VBT is present
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We can't safely probe Type C ports, whether they are a legacy or a
USB/Thunderbolt DP Alternate Type C port. This would require performing
the TypeC connect sequence - as described by the specification - but
that may have unwanted side-effects. These side-effects include at least
- without completeness - timeouts during AUX power well enabling and
subsequent PLL enabling errors.

To safely identify these ports we really need VBT, which has the proper
flag for this (ddi_vbt_port_info::supports_typec_usb, supports_tbt).
Based on the above disable Type C ports if we can't load VBT for some
reason.

v2:
- Notice that we disable TypeC ports completely and simplify accordingly
  (Jose).
- Add code comment explaining why we disabled the ports. (Jani)

Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: Jose Roberto de Souza <jose.souza@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128114242.28666-1-imre.deak@intel.com
---
 drivers/gpu/drm/i915/intel_bios.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 561a4f9f044c..b508d8a735e0 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -1663,6 +1663,13 @@ init_vbt_missing_defaults(struct drm_i915_private *dev_priv)
 		struct ddi_vbt_port_info *info =
 			&dev_priv->vbt.ddi_port_info[port];
 
+		/*
+		 * VBT has the TypeC mode (native,TBT/USB) and we don't want
+		 * to detect it.
+		 */
+		if (intel_port_is_tc(dev_priv, port))
+			continue;
+
 		info->supports_dvi = (port != PORT_A && port != PORT_E);
 		info->supports_hdmi = info->supports_dvi;
 		info->supports_dp = (port != PORT_E);

From 2b34e562361f6e440524272187432c551b57481e Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Thu, 20 Dec 2018 17:52:11 +0200
Subject: [PATCH 83/89] drm/i915/icl: Work around broken VBTs for port F
 detection

VBT may include incorrect information about the presence of port F. Work
around this on SKUs where we know the port is not present.

v2:
- Fix IS_ICL_WITH_PORT_F, so it's useable from any context.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108915
Cc: Mika Kahola <mika.kahola@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181220155211.31456-1-imre.deak@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h      | 2 ++
 drivers/gpu/drm/i915/intel_display.c | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 22da9df1f0a7..f11c66d172d3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2353,6 +2353,8 @@ static inline unsigned int i915_sg_segment_size(void)
 				 INTEL_INFO(dev_priv)->gt == 3)
 #define IS_CNL_WITH_PORT_F(dev_priv)   (IS_CANNONLAKE(dev_priv) && \
 					(INTEL_DEVID(dev_priv) & 0x0004) == 0x0004)
+#define IS_ICL_WITH_PORT_F(dev_priv)   (IS_ICELAKE(dev_priv) && \
+					INTEL_DEVID(dev_priv) != 0x8A51)
 
 #define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support)
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b0bb8adcf26f..4424b4c90578 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14376,8 +14376,10 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)
 		/*
 		 * On some ICL SKUs port F is not present. No strap bits for
 		 * this, so rely on VBT.
+		 * Work around broken VBTs on SKUs known to have no port F.
 		 */
-		if (intel_bios_is_port_present(dev_priv, PORT_F))
+		if (IS_ICL_WITH_PORT_F(dev_priv) &&
+		    intel_bios_is_port_present(dev_priv, PORT_F))
 			intel_ddi_init(dev_priv, PORT_F);
 
 		icl_dsi_init(dev_priv);

From 69b768f2bc6750f785bcf04f87b06e8c27e22de9 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 31 Jan 2019 17:08:42 -0800
Subject: [PATCH 84/89] drm/i915: Move workaround infrastructure code up

Top comment in intel_workarounds.c says common code should come first so
lets respect that. Also, by moving the common code together opportunities
to reduce duplication will become more obvious.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/1548983324-15344-2-git-send-email-talha.nassar@intel.com
---
 drivers/gpu/drm/i915/intel_workarounds.c | 74 ++++++++++++------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index 3210ad4e08f7..584c4a5d2674 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -153,6 +153,43 @@ __wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
 	_wa_add(wal, &wa);
 }
 
+static void
+wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+	struct i915_wa wa = {
+		.reg = reg,
+		.mask = val,
+		.val = _MASKED_BIT_ENABLE(val)
+	};
+
+	_wa_add(wal, &wa);
+}
+
+static void
+wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+		   u32 val)
+{
+	struct i915_wa wa = {
+		.reg = reg,
+		.mask = mask,
+		.val = val
+	};
+
+	_wa_add(wal, &wa);
+}
+
+static void
+wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+	wa_write_masked_or(wal, reg, ~0, val);
+}
+
+static void
+wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+	wa_write_masked_or(wal, reg, val, val);
+}
+
 #define WA_REG(addr, mask, val) __wa_add(wal, (addr), (mask), (val))
 
 #define WA_SET_BIT_MASKED(addr, mask) \
@@ -602,43 +639,6 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
 	return 0;
 }
 
-static void
-wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
-{
-	struct i915_wa wa = {
-		.reg = reg,
-		.mask = val,
-		.val = _MASKED_BIT_ENABLE(val)
-	};
-
-	_wa_add(wal, &wa);
-}
-
-static void
-wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
-		   u32 val)
-{
-	struct i915_wa wa = {
-		.reg = reg,
-		.mask = mask,
-		.val = val
-	};
-
-	_wa_add(wal, &wa);
-}
-
-static void
-wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
-{
-	wa_write_masked_or(wal, reg, ~0, val);
-}
-
-static void
-wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
-{
-	wa_write_masked_or(wal, reg, val, val);
-}
-
 static void
 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 {

From ae598b0d6b50f5da3d31dc1fab9880b03c631b70 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 31 Jan 2019 17:08:43 -0800
Subject: [PATCH 85/89] drm/i915: Save some lines of source code in workarounds

No functional or code size change - just notice we can compact the source
by re-using a single helper for adding workarounds.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/1548983324-15344-3-git-send-email-talha.nassar@intel.com
---
 drivers/gpu/drm/i915/intel_workarounds.c | 32 +++++-------------------
 1 file changed, 6 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index 584c4a5d2674..5c010551cd23 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -142,7 +142,8 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
 }
 
 static void
-__wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
+wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+		   u32 val)
 {
 	struct i915_wa wa = {
 		.reg = reg,
@@ -156,26 +157,7 @@ __wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
 static void
 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
-	struct i915_wa wa = {
-		.reg = reg,
-		.mask = val,
-		.val = _MASKED_BIT_ENABLE(val)
-	};
-
-	_wa_add(wal, &wa);
-}
-
-static void
-wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
-		   u32 val)
-{
-	struct i915_wa wa = {
-		.reg = reg,
-		.mask = mask,
-		.val = val
-	};
-
-	_wa_add(wal, &wa);
+	wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
 }
 
 static void
@@ -190,16 +172,14 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 	wa_write_masked_or(wal, reg, val, val);
 }
 
-#define WA_REG(addr, mask, val) __wa_add(wal, (addr), (mask), (val))
-
 #define WA_SET_BIT_MASKED(addr, mask) \
-	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
+	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
 
 #define WA_CLR_BIT_MASKED(addr, mask) \
-	WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
+	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
 
 #define WA_SET_FIELD_MASKED(addr, mask, value) \
-	WA_REG(addr, (mask), _MASKED_FIELD(mask, value))
+	wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
 
 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine)
 {

From 0b904c890ac2d9947aad96ca48f0fe5a8d032459 Mon Sep 17 00:00:00 2001
From: Talha Nassar <talha.nassar@intel.com>
Date: Thu, 31 Jan 2019 17:08:44 -0800
Subject: [PATCH 86/89] drm/i915/icl: restore WaEnableFloatBlendOptimization

Enables blend optimization for floating point RTs

This restores the workaround that was reverted in c358514ba8da
("Revert "drm/i915/icl: WaEnableFloatBlendOptimization"").

The revert was due to the register write seemingly not sticking,
but the HW team has confirmed that this is because the
register is WO and that the workaround is indeed required.

Here the wa is added with a mask of 0 since the register is WO.

References: https://hsdes.intel.com/resource/1408134172
References: https://bugs.freedesktop.org/show_bug.cgi?id=107338
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>

Signed-off-by: Talha Nassar <talha.nassar@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/1548983324-15344-4-git-send-email-talha.nassar@intel.com
---
 drivers/gpu/drm/i915/i915_reg.h          | 3 +++
 drivers/gpu/drm/i915/intel_workarounds.c | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index a64deeb4e517..ede54fdc1676 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2801,6 +2801,9 @@ enum i915_power_well_id {
 #define GEN6_RCS_PWR_FSM _MMIO(0x22ac)
 #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4)
 
+#define GEN10_CACHE_MODE_SS			_MMIO(0xe420)
+#define   FLOAT_BLEND_OPTIMIZATION_ENABLE	(1 << 4)
+
 /* Fuse readout registers for GT */
 #define HSW_PAVP_FUSE1			_MMIO(0x911C)
 #define   HSW_F1_EU_DIS_SHIFT		16
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index 5c010551cd23..15f4a6dee5aa 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -549,6 +549,12 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
 		WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
 				  GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
+
+	/* WaEnableFloatBlendOptimization:icl */
+	wa_write_masked_or(wal,
+			   GEN10_CACHE_MODE_SS,
+			   0, /* write-only, so skip validation */
+			   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
 }
 
 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)

From 7360c9f6b857e22a48e545f4e99c79630994e932 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 29 Jan 2019 15:22:37 +0100
Subject: [PATCH 87/89] drm/i915: Enable fastboot by default on VLV and CHV

We really want to have fastboot enabled by default to avoid an ugly
modeset during boot.

Currently we are enabling fastboot by default on gen9+ (Skylake and newer).
The intention is to enable it on older generations after it has seen more
testing on gen9+.

VLV and CHV devices are still being sold in stores today, as such it is
desirable to also enable fastboot by default on these now.

I've extensively tested fastboot=1 support on over 50 different
Bay- and Cherry-Trail devices. Testing DSI and eDP panels as well as
HDMI output (and even DP over Type-C on one device).

All 50 devices work fine with fastboot=1. On 2 devices their DSI panel
turns black as soon as the i915 driver loads when fastboot=0, so having
fastboot enabled is required for these 2 to work properly (for lack of
a better fix).

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129142237.8684-1-hdegoede@redhat.com
---
 drivers/gpu/drm/i915/intel_display.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4424b4c90578..8b36ee183c7e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11735,7 +11735,15 @@ static bool fastboot_enabled(struct drm_i915_private *dev_priv)
 		return i915_modparams.fastboot;
 
 	/* Enable fastboot by default on Skylake and newer */
-	return INTEL_GEN(dev_priv) >= 9;
+	if (INTEL_GEN(dev_priv) >= 9)
+		return true;
+
+	/* Enable fastboot by default on VLV and CHV */
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		return true;
+
+	/* Disabled by default on all others */
+	return false;
 }
 
 static bool

From 5e0f5a58b167fc2c8352d90c0faa8c0c9ca75c26 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 1 Feb 2019 15:50:49 -0800
Subject: [PATCH 88/89] drm/i915/cfl: Adding another PCI Device ID.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While cross checking PCI IDs from Intel Media SDK
and kernel Dmitry noticed this gap. So we checked the
spec and this new ID had been recently added.

v2: Adding new H_GT1 entry to i915_pci.c (Jose)

Reported-by: Dmitry Rogozhkin<dmitry.v.rogozhkin@intel.com>
Cc: Dmitry Rogozhkin<dmitry.v.rogozhkin@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190201235049.27206-1-rodrigo.vivi@intel.com
---
 drivers/gpu/drm/i915/i915_pci.c | 1 +
 include/drm/i915_pciids.h       | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 6f7ad10f34b8..55c9058e91a8 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -711,6 +711,7 @@ static const struct pci_device_id pciidlist[] = {
 	INTEL_AML_KBL_GT2_IDS(&intel_kabylake_gt2_info),
 	INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info),
 	INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info),
+	INTEL_CFL_H_GT1_IDS(&intel_coffeelake_gt1_info),
 	INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info),
 	INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info),
 	INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info),
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index df72be7e8b88..d2fad7b0fcf6 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -394,6 +394,9 @@
 	INTEL_VGA_DEVICE(0x3E9A, info)  /* SRV GT2 */
 
 /* CFL H */
+#define INTEL_CFL_H_GT1_IDS(info) \
+	INTEL_VGA_DEVICE(0x3E9C, info)
+
 #define INTEL_CFL_H_GT2_IDS(info) \
 	INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \
 	INTEL_VGA_DEVICE(0x3E94, info)  /* Halo GT2 */
@@ -426,6 +429,7 @@
 #define INTEL_CFL_IDS(info)	   \
 	INTEL_CFL_S_GT1_IDS(info), \
 	INTEL_CFL_S_GT2_IDS(info), \
+	INTEL_CFL_H_GT1_IDS(info), \
 	INTEL_CFL_H_GT2_IDS(info), \
 	INTEL_CFL_U_GT2_IDS(info), \
 	INTEL_CFL_U_GT3_IDS(info), \

From 46c0cd8c562bc3e4a99cbaa4ba0904b6871b7b4b Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Sat, 2 Feb 2019 00:14:28 -0800
Subject: [PATCH 89/89] drm/i915: Update DRIVER_DATE to 20190202

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f11c66d172d3..534e52e3a8da 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -91,8 +91,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20190129"
-#define DRIVER_TIMESTAMP	1548812351
+#define DRIVER_DATE		"20190202"
+#define DRIVER_TIMESTAMP	1549095268
 
 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
  * WARN_ON()) for hw state sanity checks to check for unexpected conditions