- Make background color and LUT more robust (Matt)

- Icelake display fixes (Ville, Imre)
 - Workarounds fixes and reorg (Tvrtko, Talha)
 - Enable fastboot by default on VLV and CHV (Hans)
 - Add another PCI ID for Coffee Lake (Rodrigo)
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJcVVKOAAoJEPpiX2QO6xPKzpEH/11faCaucfkejXnR2ff3H/Rc
 EQILDB+SFwzKYaxd8pLHXJ7D8stmBGW4i086bic1JFTxIi/MtQv5rfOO87jqu1DU
 3FFgCLuovzmheKVMuPxnSwGXn2ZI3RWPoDrH7OGaOtKuNAfoFTL9upZYsmBOyA+8
 srraU1zHhhR3pawqqVpGrXCVToKSYQc/mh9Od1v491yoqMEhC6r2JaGiePZQldn9
 J99ouBDOHMM1f45UX4+ORNQB951sQhJ4SW8e2bi2jKuc5WNmX3+tGLYdKemq3OYN
 vi3a4xwSPkhbGWUSQtT7Cy6e2p43p/k7CwVl1iEESVB7HOqINwmxY/UIxm3ap1s=
 =Q8JK
 -----END PGP SIGNATURE-----

Merge tag 'drm-intel-next-2019-02-02' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

- Make background color and LUT more robust (Matt)
- Icelake display fixes (Ville, Imre)
- Workarounds fixes and reorg (Tvrtko, Talha)
- Enable fastboot by default on VLV and CHV (Hans)
- Add another PCI ID for Coffee Lake (Rodrigo)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190202082911.GA6615@intel.com
This commit is contained in:
Dave Airlie 2019-02-04 15:37:52 +10:00
commit 2cc3b81dfa
88 changed files with 4822 additions and 3483 deletions

View file

@ -474,10 +474,9 @@ EXPORT_SYMBOL(drm_plane_create_color_properties);
* *
* Returns 0 on success, -EINVAL on failure. * Returns 0 on success, -EINVAL on failure.
*/ */
int drm_color_lut_check(struct drm_property_blob *lut, int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests)
uint32_t tests)
{ {
struct drm_color_lut *entry; const struct drm_color_lut *entry;
int i; int i;
if (!lut || !tests) if (!lut || !tests)

View file

@ -22,6 +22,7 @@ subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable)
subdir-ccflags-y += $(call cc-disable-warning, sign-compare) subdir-ccflags-y += $(call cc-disable-warning, sign-compare)
subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized)
subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides)
subdir-ccflags-y += $(call cc-disable-warning, uninitialized)
subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror
# Fine grained warnings disable # Fine grained warnings disable

View file

@ -342,6 +342,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
port->dpcd->data_valid = true; port->dpcd->data_valid = true;
port->dpcd->data[DPCD_SINK_COUNT] = 0x1; port->dpcd->data[DPCD_SINK_COUNT] = 0x1;
port->type = type; port->type = type;
port->id = resolution;
emulate_monitor_status_change(vgpu); emulate_monitor_status_change(vgpu);
@ -444,6 +445,36 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt)
mutex_unlock(&gvt->lock); mutex_unlock(&gvt->lock);
} }
/**
* intel_vgpu_emulate_hotplug - trigger hotplug event for vGPU
* @vgpu: a vGPU
* @conncted: link state
*
* This function is used to trigger hotplug interrupt for vGPU
*
*/
void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected)
{
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
/* TODO: add more platforms support */
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
if (connected) {
vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
SFUSE_STRAP_DDID_DETECTED;
vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
} else {
vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
~SFUSE_STRAP_DDID_DETECTED;
vgpu_vreg_t(vgpu, SDEISR) &= ~SDE_PORTD_HOTPLUG_CPT;
}
vgpu_vreg_t(vgpu, SDEIIR) |= SDE_PORTD_HOTPLUG_CPT;
vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
PORTD_HOTPLUG_STATUS_MASK;
intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG);
}
}
/** /**
* intel_vgpu_clean_display - clean vGPU virtual display emulation * intel_vgpu_clean_display - clean vGPU virtual display emulation
* @vgpu: a vGPU * @vgpu: a vGPU

View file

@ -146,18 +146,19 @@ enum intel_vgpu_port_type {
GVT_PORT_MAX GVT_PORT_MAX
}; };
enum intel_vgpu_edid {
GVT_EDID_1024_768,
GVT_EDID_1920_1200,
GVT_EDID_NUM,
};
struct intel_vgpu_port { struct intel_vgpu_port {
/* per display EDID information */ /* per display EDID information */
struct intel_vgpu_edid_data *edid; struct intel_vgpu_edid_data *edid;
/* per display DPCD information */ /* per display DPCD information */
struct intel_vgpu_dpcd_data *dpcd; struct intel_vgpu_dpcd_data *dpcd;
int type; int type;
}; enum intel_vgpu_edid id;
enum intel_vgpu_edid {
GVT_EDID_1024_768,
GVT_EDID_1920_1200,
GVT_EDID_NUM,
}; };
static inline char *vgpu_edid_str(enum intel_vgpu_edid id) static inline char *vgpu_edid_str(enum intel_vgpu_edid id)
@ -172,6 +173,30 @@ static inline char *vgpu_edid_str(enum intel_vgpu_edid id)
} }
} }
static inline unsigned int vgpu_edid_xres(enum intel_vgpu_edid id)
{
switch (id) {
case GVT_EDID_1024_768:
return 1024;
case GVT_EDID_1920_1200:
return 1920;
default:
return 0;
}
}
static inline unsigned int vgpu_edid_yres(enum intel_vgpu_edid id)
{
switch (id) {
case GVT_EDID_1024_768:
return 768;
case GVT_EDID_1920_1200:
return 1200;
default:
return 0;
}
}
void intel_gvt_emulate_vblank(struct intel_gvt *gvt); void intel_gvt_emulate_vblank(struct intel_gvt *gvt);
void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt); void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt);

View file

@ -185,6 +185,7 @@ static const struct intel_gvt_ops intel_gvt_ops = {
.vgpu_query_plane = intel_vgpu_query_plane, .vgpu_query_plane = intel_vgpu_query_plane,
.vgpu_get_dmabuf = intel_vgpu_get_dmabuf, .vgpu_get_dmabuf = intel_vgpu_get_dmabuf,
.write_protect_handler = intel_vgpu_page_track_handler, .write_protect_handler = intel_vgpu_page_track_handler,
.emulate_hotplug = intel_vgpu_emulate_hotplug,
}; };
static void init_device_info(struct intel_gvt *gvt) static void init_device_info(struct intel_gvt *gvt)

View file

@ -536,6 +536,8 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,
int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes); void *p_data, unsigned int bytes);
void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected);
static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar) static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar)
{ {
/* We are 64bit bar. */ /* We are 64bit bar. */
@ -577,6 +579,7 @@ struct intel_gvt_ops {
int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int); int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int);
int (*write_protect_handler)(struct intel_vgpu *, u64, void *, int (*write_protect_handler)(struct intel_vgpu *, u64, void *,
unsigned int); unsigned int);
void (*emulate_hotplug)(struct intel_vgpu *vgpu, bool connected);
}; };

View file

@ -67,6 +67,7 @@ struct intel_gvt_mpt {
int (*set_trap_area)(unsigned long handle, u64 start, u64 end, int (*set_trap_area)(unsigned long handle, u64 start, u64 end,
bool map); bool map);
int (*set_opregion)(void *vgpu); int (*set_opregion)(void *vgpu);
int (*set_edid)(void *vgpu, int port_num);
int (*get_vfio_device)(void *vgpu); int (*get_vfio_device)(void *vgpu);
void (*put_vfio_device)(void *vgpu); void (*put_vfio_device)(void *vgpu);
bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn); bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn);

View file

@ -57,6 +57,8 @@ static const struct intel_gvt_ops *intel_gvt_ops;
#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
#define EDID_BLOB_OFFSET (PAGE_SIZE/2)
#define OPREGION_SIGNATURE "IntelGraphicsMem" #define OPREGION_SIGNATURE "IntelGraphicsMem"
struct vfio_region; struct vfio_region;
@ -76,6 +78,11 @@ struct vfio_region {
void *data; void *data;
}; };
struct vfio_edid_region {
struct vfio_region_gfx_edid vfio_edid_regs;
void *edid_blob;
};
struct kvmgt_pgfn { struct kvmgt_pgfn {
gfn_t gfn; gfn_t gfn;
struct hlist_node hnode; struct hlist_node hnode;
@ -427,6 +434,111 @@ static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
.release = intel_vgpu_reg_release_opregion, .release = intel_vgpu_reg_release_opregion,
}; };
static int handle_edid_regs(struct intel_vgpu *vgpu,
struct vfio_edid_region *region, char *buf,
size_t count, u16 offset, bool is_write)
{
struct vfio_region_gfx_edid *regs = &region->vfio_edid_regs;
unsigned int data;
if (offset + count > sizeof(*regs))
return -EINVAL;
if (count != 4)
return -EINVAL;
if (is_write) {
data = *((unsigned int *)buf);
switch (offset) {
case offsetof(struct vfio_region_gfx_edid, link_state):
if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) {
if (!drm_edid_block_valid(
(u8 *)region->edid_blob,
0,
true,
NULL)) {
gvt_vgpu_err("invalid EDID blob\n");
return -EINVAL;
}
intel_gvt_ops->emulate_hotplug(vgpu, true);
} else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN)
intel_gvt_ops->emulate_hotplug(vgpu, false);
else {
gvt_vgpu_err("invalid EDID link state %d\n",
regs->link_state);
return -EINVAL;
}
regs->link_state = data;
break;
case offsetof(struct vfio_region_gfx_edid, edid_size):
if (data > regs->edid_max_size) {
gvt_vgpu_err("EDID size is bigger than %d!\n",
regs->edid_max_size);
return -EINVAL;
}
regs->edid_size = data;
break;
default:
/* read-only regs */
gvt_vgpu_err("write read-only EDID region at offset %d\n",
offset);
return -EPERM;
}
} else {
memcpy(buf, (char *)regs + offset, count);
}
return count;
}
static int handle_edid_blob(struct vfio_edid_region *region, char *buf,
size_t count, u16 offset, bool is_write)
{
if (offset + count > region->vfio_edid_regs.edid_size)
return -EINVAL;
if (is_write)
memcpy(region->edid_blob + offset, buf, count);
else
memcpy(buf, region->edid_blob + offset, count);
return count;
}
static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf,
size_t count, loff_t *ppos, bool iswrite)
{
int ret;
unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
VFIO_PCI_NUM_REGIONS;
struct vfio_edid_region *region =
(struct vfio_edid_region *)vgpu->vdev.region[i].data;
loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
if (pos < region->vfio_edid_regs.edid_offset) {
ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite);
} else {
pos -= EDID_BLOB_OFFSET;
ret = handle_edid_blob(region, buf, count, pos, iswrite);
}
if (ret < 0)
gvt_vgpu_err("failed to access EDID region\n");
return ret;
}
static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu,
struct vfio_region *region)
{
kfree(region->data);
}
static const struct intel_vgpu_regops intel_vgpu_regops_edid = {
.rw = intel_vgpu_reg_rw_edid,
.release = intel_vgpu_reg_release_edid,
};
static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
unsigned int type, unsigned int subtype, unsigned int type, unsigned int subtype,
const struct intel_vgpu_regops *ops, const struct intel_vgpu_regops *ops,
@ -493,6 +605,36 @@ static int kvmgt_set_opregion(void *p_vgpu)
return ret; return ret;
} }
static int kvmgt_set_edid(void *p_vgpu, int port_num)
{
struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
struct vfio_edid_region *base;
int ret;
base = kzalloc(sizeof(*base), GFP_KERNEL);
if (!base)
return -ENOMEM;
/* TODO: Add multi-port and EDID extension block support */
base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET;
base->vfio_edid_regs.edid_max_size = EDID_SIZE;
base->vfio_edid_regs.edid_size = EDID_SIZE;
base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id);
base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id);
base->edid_blob = port->edid->edid_block;
ret = intel_vgpu_register_reg(vgpu,
VFIO_REGION_TYPE_GFX,
VFIO_REGION_SUBTYPE_GFX_EDID,
&intel_vgpu_regops_edid, EDID_SIZE,
VFIO_REGION_INFO_FLAG_READ |
VFIO_REGION_INFO_FLAG_WRITE |
VFIO_REGION_INFO_FLAG_CAPS, base);
return ret;
}
static void kvmgt_put_vfio_device(void *vgpu) static void kvmgt_put_vfio_device(void *vgpu)
{ {
if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device)) if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device))
@ -1874,6 +2016,7 @@ static struct intel_gvt_mpt kvmgt_mpt = {
.dma_map_guest_page = kvmgt_dma_map_guest_page, .dma_map_guest_page = kvmgt_dma_map_guest_page,
.dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
.set_opregion = kvmgt_set_opregion, .set_opregion = kvmgt_set_opregion,
.set_edid = kvmgt_set_edid,
.get_vfio_device = kvmgt_get_vfio_device, .get_vfio_device = kvmgt_get_vfio_device,
.put_vfio_device = kvmgt_put_vfio_device, .put_vfio_device = kvmgt_put_vfio_device,
.is_valid_gfn = kvmgt_is_valid_gfn, .is_valid_gfn = kvmgt_is_valid_gfn,

View file

@ -313,6 +313,23 @@ static inline int intel_gvt_hypervisor_set_opregion(struct intel_vgpu *vgpu)
return intel_gvt_host.mpt->set_opregion(vgpu); return intel_gvt_host.mpt->set_opregion(vgpu);
} }
/**
* intel_gvt_hypervisor_set_edid - Set EDID region for guest
* @vgpu: a vGPU
* @port_num: display port number
*
* Returns:
* Zero on success, negative error code if failed.
*/
static inline int intel_gvt_hypervisor_set_edid(struct intel_vgpu *vgpu,
int port_num)
{
if (!intel_gvt_host.mpt->set_edid)
return 0;
return intel_gvt_host.mpt->set_edid(vgpu, port_num);
}
/** /**
* intel_gvt_hypervisor_get_vfio_device - increase vfio device ref count * intel_gvt_hypervisor_get_vfio_device - increase vfio device ref count
* @vgpu: a vGPU * @vgpu: a vGPU

View file

@ -428,6 +428,12 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
if (ret) if (ret)
goto out_clean_sched_policy; goto out_clean_sched_policy;
/*TODO: add more platforms support */
if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv))
ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D);
if (ret)
goto out_clean_sched_policy;
return vgpu; return vgpu;
out_clean_sched_policy: out_clean_sched_policy:

View file

@ -160,14 +160,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : ""); obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
if (obj->base.name) if (obj->base.name)
seq_printf(m, " (name: %d)", obj->base.name); seq_printf(m, " (name: %d)", obj->base.name);
list_for_each_entry(vma, &obj->vma_list, obj_link) { list_for_each_entry(vma, &obj->vma.list, obj_link) {
if (i915_vma_is_pinned(vma)) if (i915_vma_is_pinned(vma))
pin_count++; pin_count++;
} }
seq_printf(m, " (pinned x %d)", pin_count); seq_printf(m, " (pinned x %d)", pin_count);
if (obj->pin_global) if (obj->pin_global)
seq_printf(m, " (global)"); seq_printf(m, " (global)");
list_for_each_entry(vma, &obj->vma_list, obj_link) { list_for_each_entry(vma, &obj->vma.list, obj_link) {
if (!drm_mm_node_allocated(&vma->node)) if (!drm_mm_node_allocated(&vma->node))
continue; continue;
@ -323,7 +323,7 @@ static int per_file_stats(int id, void *ptr, void *data)
if (obj->base.name || obj->base.dma_buf) if (obj->base.name || obj->base.dma_buf)
stats->shared += obj->base.size; stats->shared += obj->base.size;
list_for_each_entry(vma, &obj->vma_list, obj_link) { list_for_each_entry(vma, &obj->vma.list, obj_link) {
if (!drm_mm_node_allocated(&vma->node)) if (!drm_mm_node_allocated(&vma->node))
continue; continue;
@ -1285,8 +1285,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
seq_puts(m, "Wedged\n"); seq_puts(m, "Wedged\n");
if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
seq_puts(m, "Reset in progress: struct_mutex backoff\n"); seq_puts(m, "Reset in progress: struct_mutex backoff\n");
if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags))
seq_puts(m, "Reset in progress: reset handoff to waiter\n");
if (waitqueue_active(&dev_priv->gpu_error.wait_queue)) if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
seq_puts(m, "Waiter holding struct mutex\n"); seq_puts(m, "Waiter holding struct mutex\n");
if (waitqueue_active(&dev_priv->gpu_error.reset_queue)) if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
@ -1318,37 +1316,16 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake));
for_each_engine(engine, dev_priv, id) { for_each_engine(engine, dev_priv, id) {
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct rb_node *rb;
seq_printf(m, "%s:\n", engine->name); seq_printf(m, "%s:\n", engine->name);
seq_printf(m, "\tseqno = %x [current %x, last %x]\n", seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
engine->hangcheck.seqno, seqno[id], engine->hangcheck.seqno, seqno[id],
intel_engine_last_submit(engine)); intel_engine_last_submit(engine),
seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n", jiffies_to_msecs(jiffies -
yesno(intel_engine_has_waiter(engine)), engine->hangcheck.action_timestamp));
yesno(test_bit(engine->id,
&dev_priv->gpu_error.missed_irq_rings)),
yesno(engine->hangcheck.stalled),
yesno(engine->hangcheck.wedged));
spin_lock_irq(&b->rb_lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = rb_entry(rb, typeof(*w), node);
seq_printf(m, "\t%s [%d] waiting for %x\n",
w->tsk->comm, w->tsk->pid, w->seqno);
}
spin_unlock_irq(&b->rb_lock);
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd, (long long)engine->hangcheck.acthd,
(long long)acthd[id]); (long long)acthd[id]);
seq_printf(m, "\taction = %s(%d) %d ms ago\n",
hangcheck_action_to_str(engine->hangcheck.action),
engine->hangcheck.action,
jiffies_to_msecs(jiffies -
engine->hangcheck.action_timestamp));
if (engine->id == RCS) { if (engine->id == RCS) {
seq_puts(m, "\tinstdone read =\n"); seq_puts(m, "\tinstdone read =\n");
@ -2029,18 +2006,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
return 0; return 0;
} }
static int count_irq_waiters(struct drm_i915_private *i915)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
int count = 0;
for_each_engine(engine, i915, id)
count += intel_engine_has_waiter(engine);
return count;
}
static const char *rps_power_to_str(unsigned int power) static const char *rps_power_to_str(unsigned int power)
{ {
static const char * const strings[] = { static const char * const strings[] = {
@ -2080,7 +2045,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
seq_printf(m, "RPS enabled? %d\n", rps->enabled); seq_printf(m, "RPS enabled? %d\n", rps->enabled);
seq_printf(m, "GPU busy? %s [%d requests]\n", seq_printf(m, "GPU busy? %s [%d requests]\n",
yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
seq_printf(m, "Boosts outstanding? %d\n", seq_printf(m, "Boosts outstanding? %d\n",
atomic_read(&rps->num_waiters)); atomic_read(&rps->num_waiters));
seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
@ -3912,8 +3876,6 @@ static int
i915_wedged_set(void *data, u64 val) i915_wedged_set(void *data, u64 val)
{ {
struct drm_i915_private *i915 = data; struct drm_i915_private *i915 = data;
struct intel_engine_cs *engine;
unsigned int tmp;
/* /*
* There is no safeguard against this debugfs entry colliding * There is no safeguard against this debugfs entry colliding
@ -3926,18 +3888,8 @@ i915_wedged_set(void *data, u64 val)
if (i915_reset_backoff(&i915->gpu_error)) if (i915_reset_backoff(&i915->gpu_error))
return -EAGAIN; return -EAGAIN;
for_each_engine_masked(engine, i915, val, tmp) {
engine->hangcheck.seqno = intel_engine_get_seqno(engine);
engine->hangcheck.stalled = true;
}
i915_handle_error(i915, val, I915_ERROR_CAPTURE, i915_handle_error(i915, val, I915_ERROR_CAPTURE,
"Manually set wedged engine mask = %llx", val); "Manually set wedged engine mask = %llx", val);
wait_on_bit(&i915->gpu_error.flags,
I915_RESET_HANDOFF,
TASK_UNINTERRUPTIBLE);
return 0; return 0;
} }
@ -3945,94 +3897,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
i915_wedged_get, i915_wedged_set, i915_wedged_get, i915_wedged_set,
"%llu\n"); "%llu\n");
static int
fault_irq_set(struct drm_i915_private *i915,
unsigned long *irq,
unsigned long val)
{
int err;
err = mutex_lock_interruptible(&i915->drm.struct_mutex);
if (err)
return err;
err = i915_gem_wait_for_idle(i915,
I915_WAIT_LOCKED |
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
if (err)
goto err_unlock;
*irq = val;
mutex_unlock(&i915->drm.struct_mutex);
/* Flush idle worker to disarm irq */
drain_delayed_work(&i915->gt.idle_work);
return 0;
err_unlock:
mutex_unlock(&i915->drm.struct_mutex);
return err;
}
static int
i915_ring_missed_irq_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
*val = dev_priv->gpu_error.missed_irq_rings;
return 0;
}
static int
i915_ring_missed_irq_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
return fault_irq_set(i915, &i915->gpu_error.missed_irq_rings, val);
}
DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops,
i915_ring_missed_irq_get, i915_ring_missed_irq_set,
"0x%08llx\n");
static int
i915_ring_test_irq_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
*val = dev_priv->gpu_error.test_irq_rings;
return 0;
}
static int
i915_ring_test_irq_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
/* GuC keeps the user interrupt permanently enabled for submission */
if (USES_GUC_SUBMISSION(i915))
return -ENODEV;
/*
* From icl, we can no longer individually mask interrupt generation
* from each engine.
*/
if (INTEL_GEN(i915) >= 11)
return -ENODEV;
val &= INTEL_INFO(i915)->ring_mask;
DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val);
return fault_irq_set(i915, &i915->gpu_error.test_irq_rings, val);
}
DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
i915_ring_test_irq_get, i915_ring_test_irq_set,
"0x%08llx\n");
#define DROP_UNBOUND BIT(0) #define DROP_UNBOUND BIT(0)
#define DROP_BOUND BIT(1) #define DROP_BOUND BIT(1)
#define DROP_RETIRE BIT(2) #define DROP_RETIRE BIT(2)
@ -4070,7 +3934,8 @@ i915_drop_caches_set(void *data, u64 val)
val, val & DROP_ALL); val, val & DROP_ALL);
wakeref = intel_runtime_pm_get(i915); wakeref = intel_runtime_pm_get(i915);
if (val & DROP_RESET_ACTIVE && !intel_engines_are_idle(i915)) if (val & DROP_RESET_ACTIVE &&
wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT))
i915_gem_set_wedged(i915); i915_gem_set_wedged(i915);
/* No need to check and wait for gpu resets, only libdrm auto-restarts /* No need to check and wait for gpu resets, only libdrm auto-restarts
@ -4092,13 +3957,8 @@ i915_drop_caches_set(void *data, u64 val)
mutex_unlock(&i915->drm.struct_mutex); mutex_unlock(&i915->drm.struct_mutex);
} }
if (val & DROP_RESET_ACTIVE && if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error))
i915_terminally_wedged(&i915->gpu_error)) {
i915_handle_error(i915, ALL_ENGINES, 0, NULL); i915_handle_error(i915, ALL_ENGINES, 0, NULL);
wait_on_bit(&i915->gpu_error.flags,
I915_RESET_HANDOFF,
TASK_UNINTERRUPTIBLE);
}
fs_reclaim_acquire(GFP_KERNEL); fs_reclaim_acquire(GFP_KERNEL);
if (val & DROP_BOUND) if (val & DROP_BOUND)
@ -4800,8 +4660,6 @@ static const struct i915_debugfs_files {
} i915_debugfs_files[] = { } i915_debugfs_files[] = {
{"i915_wedged", &i915_wedged_fops}, {"i915_wedged", &i915_wedged_fops},
{"i915_cache_sharing", &i915_cache_sharing_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops},
{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
{"i915_ring_test_irq", &i915_ring_test_irq_fops},
{"i915_gem_drop_caches", &i915_drop_caches_fops}, {"i915_gem_drop_caches", &i915_drop_caches_fops},
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
{"i915_error_state", &i915_error_state_fops}, {"i915_error_state", &i915_error_state_fops},

View file

@ -91,8 +91,8 @@
#define DRIVER_NAME "i915" #define DRIVER_NAME "i915"
#define DRIVER_DESC "Intel Graphics" #define DRIVER_DESC "Intel Graphics"
#define DRIVER_DATE "20190124" #define DRIVER_DATE "20190202"
#define DRIVER_TIMESTAMP 1548370857 #define DRIVER_TIMESTAMP 1549095268
/* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
* WARN_ON()) for hw state sanity checks to check for unexpected conditions * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@ -1114,6 +1114,7 @@ struct skl_ddb_values {
}; };
struct skl_wm_level { struct skl_wm_level {
u16 min_ddb_alloc;
u16 plane_res_b; u16 plane_res_b;
u8 plane_res_l; u8 plane_res_l;
bool plane_en; bool plane_en;
@ -1975,7 +1976,14 @@ struct drm_i915_private {
void (*resume)(struct drm_i915_private *); void (*resume)(struct drm_i915_private *);
void (*cleanup_engine)(struct intel_engine_cs *engine); void (*cleanup_engine)(struct intel_engine_cs *engine);
struct list_head timelines; struct i915_gt_timelines {
struct mutex mutex; /* protects list, tainted by GPU */
struct list_head active_list;
/* Pack multiple timelines' seqnos into the same page */
spinlock_t hwsp_lock;
struct list_head hwsp_free_list;
} timelines;
struct list_head active_rings; struct list_head active_rings;
struct list_head closed_vma; struct list_head closed_vma;
@ -2345,6 +2353,8 @@ static inline unsigned int i915_sg_segment_size(void)
INTEL_INFO(dev_priv)->gt == 3) INTEL_INFO(dev_priv)->gt == 3)
#define IS_CNL_WITH_PORT_F(dev_priv) (IS_CANNONLAKE(dev_priv) && \ #define IS_CNL_WITH_PORT_F(dev_priv) (IS_CANNONLAKE(dev_priv) && \
(INTEL_DEVID(dev_priv) & 0x0004) == 0x0004) (INTEL_DEVID(dev_priv) & 0x0004) == 0x0004)
#define IS_ICL_WITH_PORT_F(dev_priv) (IS_ICELAKE(dev_priv) && \
INTEL_DEVID(dev_priv) != 0x8A51)
#define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support) #define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support)
@ -3001,11 +3011,6 @@ static inline bool i915_reset_backoff(struct i915_gpu_error *error)
return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags)); return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
} }
static inline bool i915_reset_handoff(struct i915_gpu_error *error)
{
return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags));
}
static inline bool i915_terminally_wedged(struct i915_gpu_error *error) static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
{ {
return unlikely(test_bit(I915_WEDGED, &error->flags)); return unlikely(test_bit(I915_WEDGED, &error->flags));

View file

@ -247,21 +247,19 @@ int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
struct drm_file *file) struct drm_file *file)
{ {
struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct drm_i915_gem_get_aperture *args = data; struct drm_i915_gem_get_aperture *args = data;
struct i915_vma *vma; struct i915_vma *vma;
u64 pinned; u64 pinned;
mutex_lock(&ggtt->vm.mutex);
pinned = ggtt->vm.reserved; pinned = ggtt->vm.reserved;
mutex_lock(&dev->struct_mutex); list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
list_for_each_entry(vma, &ggtt->vm.active_list, vm_link)
if (i915_vma_is_pinned(vma)) if (i915_vma_is_pinned(vma))
pinned += vma->node.size; pinned += vma->node.size;
list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link)
if (i915_vma_is_pinned(vma)) mutex_unlock(&ggtt->vm.mutex);
pinned += vma->node.size;
mutex_unlock(&dev->struct_mutex);
args->aper_size = ggtt->vm.total; args->aper_size = ggtt->vm.total;
args->aper_available_size = args->aper_size - pinned; args->aper_available_size = args->aper_size - pinned;
@ -441,15 +439,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
if (ret) if (ret)
return ret; return ret;
while ((vma = list_first_entry_or_null(&obj->vma_list, spin_lock(&obj->vma.lock);
while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
struct i915_vma, struct i915_vma,
obj_link))) { obj_link))) {
list_move_tail(&vma->obj_link, &still_in_list); list_move_tail(&vma->obj_link, &still_in_list);
spin_unlock(&obj->vma.lock);
ret = i915_vma_unbind(vma); ret = i915_vma_unbind(vma);
if (ret)
break; spin_lock(&obj->vma.lock);
} }
list_splice(&still_in_list, &obj->vma_list); list_splice(&still_in_list, &obj->vma.list);
spin_unlock(&obj->vma.lock);
return ret; return ret;
} }
@ -659,11 +661,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
struct intel_rps_client *rps_client) struct intel_rps_client *rps_client)
{ {
might_sleep(); might_sleep();
#if IS_ENABLED(CONFIG_LOCKDEP)
GEM_BUG_ON(debug_locks &&
!!lockdep_is_held(&obj->base.dev->struct_mutex) !=
!!(flags & I915_WAIT_LOCKED));
#endif
GEM_BUG_ON(timeout < 0); GEM_BUG_ON(timeout < 0);
timeout = i915_gem_object_wait_reservation(obj->resv, timeout = i915_gem_object_wait_reservation(obj->resv,
@ -1539,23 +1536,21 @@ err:
static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
{ {
struct drm_i915_private *i915; struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct list_head *list; struct list_head *list;
struct i915_vma *vma; struct i915_vma *vma;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
mutex_lock(&i915->ggtt.vm.mutex);
for_each_ggtt_vma(vma, obj) { for_each_ggtt_vma(vma, obj) {
if (i915_vma_is_active(vma))
continue;
if (!drm_mm_node_allocated(&vma->node)) if (!drm_mm_node_allocated(&vma->node))
continue; continue;
list_move_tail(&vma->vm_link, &vma->vm->inactive_list); list_move_tail(&vma->vm_link, &vma->vm->bound_list);
} }
mutex_unlock(&i915->ggtt.vm.mutex);
i915 = to_i915(obj->base.dev);
spin_lock(&i915->mm.obj_lock); spin_lock(&i915->mm.obj_lock);
list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
list_move_tail(&obj->mm.link, list); list_move_tail(&obj->mm.link, list);
@ -2878,6 +2873,14 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
return 0; return 0;
} }
static bool match_ring(struct i915_request *rq)
{
struct drm_i915_private *dev_priv = rq->i915;
u32 ring = I915_READ(RING_START(rq->engine->mmio_base));
return ring == i915_ggtt_offset(rq->ring->vma);
}
struct i915_request * struct i915_request *
i915_gem_find_active_request(struct intel_engine_cs *engine) i915_gem_find_active_request(struct intel_engine_cs *engine)
{ {
@ -2897,9 +2900,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
*/ */
spin_lock_irqsave(&engine->timeline.lock, flags); spin_lock_irqsave(&engine->timeline.lock, flags);
list_for_each_entry(request, &engine->timeline.requests, link) { list_for_each_entry(request, &engine->timeline.requests, link) {
if (__i915_request_completed(request, request->global_seqno)) if (i915_request_completed(request))
continue; continue;
if (!i915_request_started(request))
break;
/* More than one preemptible request may match! */
if (!match_ring(request))
break;
active = request; active = request;
break; break;
} }
@ -3229,14 +3239,38 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
return ret; return ret;
} }
static long wait_for_timeline(struct i915_timeline *tl, static int wait_for_engines(struct drm_i915_private *i915)
{
if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
dev_err(i915->drm.dev,
"Failed to idle engines, declaring wedged!\n");
GEM_TRACE_DUMP();
i915_gem_set_wedged(i915);
return -EIO;
}
return 0;
}
static long
wait_for_timelines(struct drm_i915_private *i915,
unsigned int flags, long timeout) unsigned int flags, long timeout)
{ {
struct i915_gt_timelines *gt = &i915->gt.timelines;
struct i915_timeline *tl;
if (!READ_ONCE(i915->gt.active_requests))
return timeout;
mutex_lock(&gt->mutex);
list_for_each_entry(tl, &gt->active_list, link) {
struct i915_request *rq; struct i915_request *rq;
rq = i915_gem_active_get_unlocked(&tl->last_request); rq = i915_gem_active_get_unlocked(&tl->last_request);
if (!rq) if (!rq)
return timeout; continue;
mutex_unlock(&gt->mutex);
/* /*
* "Race-to-idle". * "Race-to-idle".
@ -3252,23 +3286,18 @@ static long wait_for_timeline(struct i915_timeline *tl,
timeout = i915_request_wait(rq, flags, timeout); timeout = i915_request_wait(rq, flags, timeout);
i915_request_put(rq); i915_request_put(rq);
if (timeout < 0)
return timeout;
/* restart after reacquiring the lock */
mutex_lock(&gt->mutex);
tl = list_entry(&gt->active_list, typeof(*tl), link);
}
mutex_unlock(&gt->mutex);
return timeout; return timeout;
} }
static int wait_for_engines(struct drm_i915_private *i915)
{
if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
dev_err(i915->drm.dev,
"Failed to idle engines, declaring wedged!\n");
GEM_TRACE_DUMP();
i915_gem_set_wedged(i915);
return -EIO;
}
return 0;
}
int i915_gem_wait_for_idle(struct drm_i915_private *i915, int i915_gem_wait_for_idle(struct drm_i915_private *i915,
unsigned int flags, long timeout) unsigned int flags, long timeout)
{ {
@ -3280,17 +3309,15 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
if (!READ_ONCE(i915->gt.awake)) if (!READ_ONCE(i915->gt.awake))
return 0; return 0;
timeout = wait_for_timelines(i915, flags, timeout);
if (timeout < 0)
return timeout;
if (flags & I915_WAIT_LOCKED) { if (flags & I915_WAIT_LOCKED) {
struct i915_timeline *tl;
int err; int err;
lockdep_assert_held(&i915->drm.struct_mutex); lockdep_assert_held(&i915->drm.struct_mutex);
list_for_each_entry(tl, &i915->gt.timelines, link) {
timeout = wait_for_timeline(tl, flags, timeout);
if (timeout < 0)
return timeout;
}
if (GEM_SHOW_DEBUG() && !timeout) { if (GEM_SHOW_DEBUG() && !timeout) {
/* Presume that timeout was non-zero to begin with! */ /* Presume that timeout was non-zero to begin with! */
dev_warn(&i915->drm.pdev->dev, dev_warn(&i915->drm.pdev->dev,
@ -3304,17 +3331,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
i915_retire_requests(i915); i915_retire_requests(i915);
GEM_BUG_ON(i915->gt.active_requests); GEM_BUG_ON(i915->gt.active_requests);
} else {
struct intel_engine_cs *engine;
enum intel_engine_id id;
for_each_engine(engine, i915, id) {
struct i915_timeline *tl = &engine->timeline;
timeout = wait_for_timeline(tl, flags, timeout);
if (timeout < 0)
return timeout;
}
} }
return 0; return 0;
@ -3500,7 +3516,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
* reading an invalid PTE on older architectures. * reading an invalid PTE on older architectures.
*/ */
restart: restart:
list_for_each_entry(vma, &obj->vma_list, obj_link) { list_for_each_entry(vma, &obj->vma.list, obj_link) {
if (!drm_mm_node_allocated(&vma->node)) if (!drm_mm_node_allocated(&vma->node))
continue; continue;
@ -3578,7 +3594,7 @@ restart:
*/ */
} }
list_for_each_entry(vma, &obj->vma_list, obj_link) { list_for_each_entry(vma, &obj->vma.list, obj_link) {
if (!drm_mm_node_allocated(&vma->node)) if (!drm_mm_node_allocated(&vma->node))
continue; continue;
@ -3588,7 +3604,7 @@ restart:
} }
} }
list_for_each_entry(vma, &obj->vma_list, obj_link) list_for_each_entry(vma, &obj->vma.list, obj_link)
vma->node.color = cache_level; vma->node.color = cache_level;
i915_gem_object_set_cache_coherency(obj, cache_level); i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true; /* Always invalidate stale cachelines */ obj->cache_dirty = true; /* Always invalidate stale cachelines */
@ -4164,7 +4180,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
{ {
mutex_init(&obj->mm.lock); mutex_init(&obj->mm.lock);
INIT_LIST_HEAD(&obj->vma_list); spin_lock_init(&obj->vma.lock);
INIT_LIST_HEAD(&obj->vma.list);
INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->lut_list);
INIT_LIST_HEAD(&obj->batch_pool_link); INIT_LIST_HEAD(&obj->batch_pool_link);
@ -4330,14 +4348,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
mutex_lock(&i915->drm.struct_mutex); mutex_lock(&i915->drm.struct_mutex);
GEM_BUG_ON(i915_gem_object_is_active(obj)); GEM_BUG_ON(i915_gem_object_is_active(obj));
list_for_each_entry_safe(vma, vn, list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
&obj->vma_list, obj_link) {
GEM_BUG_ON(i915_vma_is_active(vma)); GEM_BUG_ON(i915_vma_is_active(vma));
vma->flags &= ~I915_VMA_PIN_MASK; vma->flags &= ~I915_VMA_PIN_MASK;
i915_vma_destroy(vma); i915_vma_destroy(vma);
} }
GEM_BUG_ON(!list_empty(&obj->vma_list)); GEM_BUG_ON(!list_empty(&obj->vma.list));
GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
/* This serializes freeing with the shrinker. Since the free /* This serializes freeing with the shrinker. Since the free
* is delayed, first by RCU then by the workqueue, we want the * is delayed, first by RCU then by the workqueue, we want the
@ -4495,8 +4512,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
GEM_TRACE("\n"); GEM_TRACE("\n");
mutex_lock(&i915->drm.struct_mutex);
wakeref = intel_runtime_pm_get(i915); wakeref = intel_runtime_pm_get(i915);
intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
@ -4522,6 +4537,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
intel_runtime_pm_put(i915, wakeref); intel_runtime_pm_put(i915, wakeref);
mutex_lock(&i915->drm.struct_mutex);
i915_gem_contexts_lost(i915); i915_gem_contexts_lost(i915);
mutex_unlock(&i915->drm.struct_mutex); mutex_unlock(&i915->drm.struct_mutex);
} }
@ -4536,6 +4552,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
wakeref = intel_runtime_pm_get(i915); wakeref = intel_runtime_pm_get(i915);
intel_suspend_gt_powersave(i915); intel_suspend_gt_powersave(i915);
flush_workqueue(i915->wq);
mutex_lock(&i915->drm.struct_mutex); mutex_lock(&i915->drm.struct_mutex);
/* /*
@ -4565,11 +4583,9 @@ int i915_gem_suspend(struct drm_i915_private *i915)
i915_retire_requests(i915); /* ensure we flush after wedging */ i915_retire_requests(i915); /* ensure we flush after wedging */
mutex_unlock(&i915->drm.struct_mutex); mutex_unlock(&i915->drm.struct_mutex);
i915_reset_flush(i915);
intel_uc_suspend(i915); drain_delayed_work(&i915->gt.retire_work);
cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
cancel_delayed_work_sync(&i915->gt.retire_work);
/* /*
* As the idle_work is rearming if it detects a race, play safe and * As the idle_work is rearming if it detects a race, play safe and
@ -4577,6 +4593,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
*/ */
drain_delayed_work(&i915->gt.idle_work); drain_delayed_work(&i915->gt.idle_work);
intel_uc_suspend(i915);
/* /*
* Assert that we successfully flushed all the work and * Assert that we successfully flushed all the work and
* reset the GPU back to its idle, low power state. * reset the GPU back to its idle, low power state.
@ -5013,6 +5031,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
dev_priv->gt.cleanup_engine = intel_engine_cleanup; dev_priv->gt.cleanup_engine = intel_engine_cleanup;
} }
i915_timelines_init(dev_priv);
ret = i915_gem_init_userptr(dev_priv); ret = i915_gem_init_userptr(dev_priv);
if (ret) if (ret)
return ret; return ret;
@ -5135,8 +5155,10 @@ err_unlock:
err_uc_misc: err_uc_misc:
intel_uc_fini_misc(dev_priv); intel_uc_fini_misc(dev_priv);
if (ret != -EIO) if (ret != -EIO) {
i915_gem_cleanup_userptr(dev_priv); i915_gem_cleanup_userptr(dev_priv);
i915_timelines_fini(dev_priv);
}
if (ret == -EIO) { if (ret == -EIO) {
mutex_lock(&dev_priv->drm.struct_mutex); mutex_lock(&dev_priv->drm.struct_mutex);
@ -5187,6 +5209,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
intel_uc_fini_misc(dev_priv); intel_uc_fini_misc(dev_priv);
i915_gem_cleanup_userptr(dev_priv); i915_gem_cleanup_userptr(dev_priv);
i915_timelines_fini(dev_priv);
i915_gem_drain_freed_objects(dev_priv); i915_gem_drain_freed_objects(dev_priv);
@ -5289,7 +5312,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
if (!dev_priv->priorities) if (!dev_priv->priorities)
goto err_dependencies; goto err_dependencies;
INIT_LIST_HEAD(&dev_priv->gt.timelines);
INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.active_rings);
INIT_LIST_HEAD(&dev_priv->gt.closed_vma); INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
@ -5333,7 +5355,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
WARN_ON(dev_priv->mm.object_count); WARN_ON(dev_priv->mm.object_count);
WARN_ON(!list_empty(&dev_priv->gt.timelines));
kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->priorities);
kmem_cache_destroy(dev_priv->dependencies); kmem_cache_destroy(dev_priv->dependencies);

View file

@ -327,6 +327,9 @@ intel_context_init(struct intel_context *ce,
struct intel_engine_cs *engine) struct intel_engine_cs *engine)
{ {
ce->gem_context = ctx; ce->gem_context = ctx;
INIT_LIST_HEAD(&ce->signal_link);
INIT_LIST_HEAD(&ce->signals);
} }
static struct i915_gem_context * static struct i915_gem_context *

View file

@ -164,6 +164,8 @@ struct i915_gem_context {
struct intel_context { struct intel_context {
struct i915_gem_context *gem_context; struct i915_gem_context *gem_context;
struct intel_engine_cs *active; struct intel_engine_cs *active;
struct list_head signal_link;
struct list_head signals;
struct i915_vma *state; struct i915_vma *state;
struct intel_ring *ring; struct intel_ring *ring;
u32 *lrc_reg_state; u32 *lrc_reg_state;

View file

@ -126,31 +126,25 @@ i915_gem_evict_something(struct i915_address_space *vm,
struct drm_i915_private *dev_priv = vm->i915; struct drm_i915_private *dev_priv = vm->i915;
struct drm_mm_scan scan; struct drm_mm_scan scan;
struct list_head eviction_list; struct list_head eviction_list;
struct list_head *phases[] = {
&vm->inactive_list,
&vm->active_list,
NULL,
}, **phase;
struct i915_vma *vma, *next; struct i915_vma *vma, *next;
struct drm_mm_node *node; struct drm_mm_node *node;
enum drm_mm_insert_mode mode; enum drm_mm_insert_mode mode;
struct i915_vma *active;
int ret; int ret;
lockdep_assert_held(&vm->i915->drm.struct_mutex); lockdep_assert_held(&vm->i915->drm.struct_mutex);
trace_i915_gem_evict(vm, min_size, alignment, flags); trace_i915_gem_evict(vm, min_size, alignment, flags);
/* /*
* The goal is to evict objects and amalgamate space in LRU order. * The goal is to evict objects and amalgamate space in rough LRU order.
* The oldest idle objects reside on the inactive list, which is in * Since both active and inactive objects reside on the same list,
* retirement order. The next objects to retire are those in flight, * in a mix of creation and last scanned order, as we process the list
* on the active list, again in retirement order. * we sort it into inactive/active, which keeps the active portion
* in a rough MRU order.
* *
* The retirement sequence is thus: * The retirement sequence is thus:
* 1. Inactive objects (already retired) * 1. Inactive objects (already retired, random order)
* 2. Active objects (will stall on unbinding) * 2. Active objects (will stall on unbinding, oldest scanned first)
*
* On each list, the oldest objects lie at the HEAD with the freshest
* object on the TAIL.
*/ */
mode = DRM_MM_INSERT_BEST; mode = DRM_MM_INSERT_BEST;
if (flags & PIN_HIGH) if (flags & PIN_HIGH)
@ -169,17 +163,46 @@ i915_gem_evict_something(struct i915_address_space *vm,
*/ */
if (!(flags & PIN_NONBLOCK)) if (!(flags & PIN_NONBLOCK))
i915_retire_requests(dev_priv); i915_retire_requests(dev_priv);
else
phases[1] = NULL;
search_again: search_again:
active = NULL;
INIT_LIST_HEAD(&eviction_list); INIT_LIST_HEAD(&eviction_list);
phase = phases; list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
do { /*
list_for_each_entry(vma, *phase, vm_link) * We keep this list in a rough least-recently scanned order
* of active elements (inactive elements are cheap to reap).
* New entries are added to the end, and we move anything we
* scan to the end. The assumption is that the working set
* of applications is either steady state (and thanks to the
* userspace bo cache it almost always is) or volatile and
* frequently replaced after a frame, which are self-evicting!
* Given that assumption, the MRU order of the scan list is
* fairly static, and keeping it in least-recently scan order
* is suitable.
*
* To notice when we complete one full cycle, we record the
* first active element seen, before moving it to the tail.
*/
if (i915_vma_is_active(vma)) {
if (vma == active) {
if (flags & PIN_NONBLOCK)
break;
active = ERR_PTR(-EAGAIN);
}
if (active != ERR_PTR(-EAGAIN)) {
if (!active)
active = vma;
list_move_tail(&vma->vm_link, &vm->bound_list);
continue;
}
}
if (mark_free(&scan, vma, flags, &eviction_list)) if (mark_free(&scan, vma, flags, &eviction_list))
goto found; goto found;
} while (*++phase); }
/* Nothing found, clean up and bail out! */ /* Nothing found, clean up and bail out! */
list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
@ -388,11 +411,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
*/ */
int i915_gem_evict_vm(struct i915_address_space *vm) int i915_gem_evict_vm(struct i915_address_space *vm)
{ {
struct list_head *phases[] = {
&vm->inactive_list,
&vm->active_list,
NULL
}, **phase;
struct list_head eviction_list; struct list_head eviction_list;
struct i915_vma *vma, *next; struct i915_vma *vma, *next;
int ret; int ret;
@ -412,16 +430,15 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
} }
INIT_LIST_HEAD(&eviction_list); INIT_LIST_HEAD(&eviction_list);
phase = phases; mutex_lock(&vm->mutex);
do { list_for_each_entry(vma, &vm->bound_list, vm_link) {
list_for_each_entry(vma, *phase, vm_link) {
if (i915_vma_is_pinned(vma)) if (i915_vma_is_pinned(vma))
continue; continue;
__i915_vma_pin(vma); __i915_vma_pin(vma);
list_add(&vma->evict_link, &eviction_list); list_add(&vma->evict_link, &eviction_list);
} }
} while (*++phase); mutex_unlock(&vm->mutex);
ret = 0; ret = 0;
list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {

View file

@ -1976,6 +1976,18 @@ static int eb_submit(struct i915_execbuffer *eb)
return err; return err;
} }
/*
* After we completed waiting for other engines (using HW semaphores)
* then we can signal that this request/batch is ready to run. This
* allows us to determine if the batch is still waiting on the GPU
* or actually running by checking the breadcrumb.
*/
if (eb->engine->emit_init_breadcrumb) {
err = eb->engine->emit_init_breadcrumb(eb->request);
if (err)
return err;
}
err = eb->engine->emit_bb_start(eb->request, err = eb->engine->emit_bb_start(eb->request,
eb->batch->node.start + eb->batch->node.start +
eb->batch_start_offset, eb->batch_start_offset,

View file

@ -50,4 +50,3 @@ struct drm_i915_fence_reg {
}; };
#endif #endif

View file

@ -491,9 +491,8 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
stash_init(&vm->free_pages); stash_init(&vm->free_pages);
INIT_LIST_HEAD(&vm->active_list);
INIT_LIST_HEAD(&vm->inactive_list);
INIT_LIST_HEAD(&vm->unbound_list); INIT_LIST_HEAD(&vm->unbound_list);
INIT_LIST_HEAD(&vm->bound_list);
} }
static void i915_address_space_fini(struct i915_address_space *vm) static void i915_address_space_fini(struct i915_address_space *vm)
@ -1932,7 +1931,10 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
INIT_LIST_HEAD(&vma->obj_link); INIT_LIST_HEAD(&vma->obj_link);
mutex_lock(&vma->vm->mutex);
list_add(&vma->vm_link, &vma->vm->unbound_list); list_add(&vma->vm_link, &vma->vm->unbound_list);
mutex_unlock(&vma->vm->mutex);
return vma; return vma;
} }
@ -2111,8 +2113,7 @@ void i915_ppgtt_close(struct i915_address_space *vm)
static void ppgtt_destroy_vma(struct i915_address_space *vm) static void ppgtt_destroy_vma(struct i915_address_space *vm)
{ {
struct list_head *phases[] = { struct list_head *phases[] = {
&vm->active_list, &vm->bound_list,
&vm->inactive_list,
&vm->unbound_list, &vm->unbound_list,
NULL, NULL,
}, **phase; }, **phase;
@ -2135,8 +2136,7 @@ void i915_ppgtt_release(struct kref *kref)
ppgtt_destroy_vma(&ppgtt->vm); ppgtt_destroy_vma(&ppgtt->vm);
GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list)); GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list));
GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list));
GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list)); GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
ppgtt->vm.cleanup(&ppgtt->vm); ppgtt->vm.cleanup(&ppgtt->vm);
@ -2801,8 +2801,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
mutex_lock(&dev_priv->drm.struct_mutex); mutex_lock(&dev_priv->drm.struct_mutex);
i915_gem_fini_aliasing_ppgtt(dev_priv); i915_gem_fini_aliasing_ppgtt(dev_priv);
GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link)
WARN_ON(i915_vma_unbind(vma)); WARN_ON(i915_vma_unbind(vma));
if (drm_mm_node_allocated(&ggtt->error_capture)) if (drm_mm_node_allocated(&ggtt->error_capture))
@ -3508,32 +3507,39 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
i915_check_and_clear_faults(dev_priv); i915_check_and_clear_faults(dev_priv);
mutex_lock(&ggtt->vm.mutex);
/* First fill our portion of the GTT with scratch pages */ /* First fill our portion of the GTT with scratch pages */
ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
/* clflush objects bound into the GGTT and rebind them. */ /* clflush objects bound into the GGTT and rebind them. */
GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_object *obj = vma->obj;
if (!(vma->flags & I915_VMA_GLOBAL_BIND)) if (!(vma->flags & I915_VMA_GLOBAL_BIND))
continue; continue;
mutex_unlock(&ggtt->vm.mutex);
if (!i915_vma_unbind(vma)) if (!i915_vma_unbind(vma))
continue; goto lock;
WARN_ON(i915_vma_bind(vma, WARN_ON(i915_vma_bind(vma,
obj ? obj->cache_level : 0, obj ? obj->cache_level : 0,
PIN_UPDATE)); PIN_UPDATE));
if (obj) if (obj)
WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
lock:
mutex_lock(&ggtt->vm.mutex);
} }
ggtt->vm.closed = false; ggtt->vm.closed = false;
i915_ggtt_invalidate(dev_priv); i915_ggtt_invalidate(dev_priv);
mutex_unlock(&ggtt->vm.mutex);
if (INTEL_GEN(dev_priv) >= 8) { if (INTEL_GEN(dev_priv) >= 8) {
struct intel_ppat *ppat = &dev_priv->ppat; struct intel_ppat *ppat = &dev_priv->ppat;

View file

@ -39,6 +39,7 @@
#include <linux/pagevec.h> #include <linux/pagevec.h>
#include "i915_request.h" #include "i915_request.h"
#include "i915_reset.h"
#include "i915_selftest.h" #include "i915_selftest.h"
#include "i915_timeline.h" #include "i915_timeline.h"
@ -298,32 +299,12 @@ struct i915_address_space {
struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */ struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */
/** /**
* List of objects currently involved in rendering. * List of vma currently bound.
*
* Includes buffers having the contents of their GPU caches
* flushed, not necessarily primitives. last_read_req
* represents when the rendering involved will be completed.
*
* A reference is held on the buffer while on this list.
*/ */
struct list_head active_list; struct list_head bound_list;
/** /**
* LRU list of objects which are not in the ringbuffer and * List of vma that are not unbound.
* are ready to unbind, but are still in the GTT.
*
* last_read_req is NULL while an object is in this list.
*
* A reference is not held on the buffer while on this list,
* as merely being GTT-bound shouldn't prevent its being
* freed, and we'll pull it off the list in the free path.
*/
struct list_head inactive_list;
/**
* List of vma that have been unbound.
*
* A reference is not held on the buffer while on this list.
*/ */
struct list_head unbound_list; struct list_head unbound_list;
@ -661,19 +642,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
/* Flags used by pin/bind&friends. */ /* Flags used by pin/bind&friends. */
#define PIN_NONBLOCK BIT_ULL(0) #define PIN_NONBLOCK BIT_ULL(0)
#define PIN_MAPPABLE BIT_ULL(1) #define PIN_NONFAULT BIT_ULL(1)
#define PIN_ZONE_4G BIT_ULL(2) #define PIN_NOEVICT BIT_ULL(2)
#define PIN_NONFAULT BIT_ULL(3) #define PIN_MAPPABLE BIT_ULL(3)
#define PIN_NOEVICT BIT_ULL(4) #define PIN_ZONE_4G BIT_ULL(4)
#define PIN_HIGH BIT_ULL(5)
#define PIN_OFFSET_BIAS BIT_ULL(6)
#define PIN_OFFSET_FIXED BIT_ULL(7)
#define PIN_MBZ BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */ #define PIN_MBZ BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */
#define PIN_GLOBAL BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */ #define PIN_GLOBAL BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */
#define PIN_USER BIT_ULL(7) /* I915_VMA_LOCAL_BIND */ #define PIN_USER BIT_ULL(10) /* I915_VMA_LOCAL_BIND */
#define PIN_UPDATE BIT_ULL(8) #define PIN_UPDATE BIT_ULL(11)
#define PIN_HIGH BIT_ULL(9)
#define PIN_OFFSET_BIAS BIT_ULL(10)
#define PIN_OFFSET_FIXED BIT_ULL(11)
#define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) #define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE)
#endif #endif

View file

@ -87,24 +87,33 @@ struct drm_i915_gem_object {
const struct drm_i915_gem_object_ops *ops; const struct drm_i915_gem_object_ops *ops;
struct {
/** /**
* @vma_list: List of VMAs backed by this object * @vma.lock: protect the list/tree of vmas
*
* The VMA on this list are ordered by type, all GGTT vma are placed
* at the head and all ppGTT vma are placed at the tail. The different
* types of GGTT vma are unordered between themselves, use the
* @vma_tree (which has a defined order between all VMA) to find an
* exact match.
*/ */
struct list_head vma_list; spinlock_t lock;
/** /**
* @vma_tree: Ordered tree of VMAs backed by this object * @vma.list: List of VMAs backed by this object
* *
* All VMA created for this object are placed in the @vma_tree for * The VMA on this list are ordered by type, all GGTT vma are
* fast retrieval via a binary search in i915_vma_instance(). * placed at the head and all ppGTT vma are placed at the tail.
* They are also added to @vma_list for easy iteration. * The different types of GGTT vma are unordered between
* themselves, use the @vma.tree (which has a defined order
* between all VMA) to quickly find an exact match.
*/ */
struct rb_root vma_tree; struct list_head list;
/**
* @vma.tree: Ordered tree of VMAs backed by this object
*
* All VMA created for this object are placed in the @vma.tree
* for fast retrieval via a binary search in
* i915_vma_instance(). They are also added to @vma.list for
* easy iteration.
*/
struct rb_root tree;
} vma;
/** /**
* @lut_list: List of vma lookup entries in use for this object. * @lut_list: List of vma lookup entries in use for this object.

View file

@ -461,12 +461,20 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
I915_SHRINK_VMAPS); I915_SHRINK_VMAPS);
/* We also want to clear any cached iomaps as they wrap vmap */ /* We also want to clear any cached iomaps as they wrap vmap */
mutex_lock(&i915->ggtt.vm.mutex);
list_for_each_entry_safe(vma, next, list_for_each_entry_safe(vma, next,
&i915->ggtt.vm.inactive_list, vm_link) { &i915->ggtt.vm.bound_list, vm_link) {
unsigned long count = vma->node.size >> PAGE_SHIFT; unsigned long count = vma->node.size >> PAGE_SHIFT;
if (vma->iomap && i915_vma_unbind(vma) == 0)
if (!vma->iomap || i915_vma_is_active(vma))
continue;
mutex_unlock(&i915->ggtt.vm.mutex);
if (i915_vma_unbind(vma) == 0)
freed_pages += count; freed_pages += count;
mutex_lock(&i915->ggtt.vm.mutex);
} }
mutex_unlock(&i915->ggtt.vm.mutex);
out: out:
shrinker_unlock(i915, unlock); shrinker_unlock(i915, unlock);

View file

@ -701,7 +701,10 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
vma->pages = obj->mm.pages; vma->pages = obj->mm.pages;
vma->flags |= I915_VMA_GLOBAL_BIND; vma->flags |= I915_VMA_GLOBAL_BIND;
__i915_vma_set_map_and_fenceable(vma); __i915_vma_set_map_and_fenceable(vma);
list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list);
mutex_lock(&ggtt->vm.mutex);
list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
mutex_unlock(&ggtt->vm.mutex);
spin_lock(&dev_priv->mm.obj_lock); spin_lock(&dev_priv->mm.obj_lock);
list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);

View file

@ -447,9 +447,14 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
if (!erq->seqno) if (!erq->seqno)
return; return;
err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
prefix, erq->pid, erq->ban_score, prefix, erq->pid, erq->ban_score,
erq->context, erq->seqno, erq->sched_attr.priority, erq->context, erq->seqno,
test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&erq->flags) ? "!" : "",
test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
&erq->flags) ? "+" : "",
erq->sched_attr.priority,
jiffies_to_msecs(erq->jiffies - epoch), jiffies_to_msecs(erq->jiffies - epoch),
erq->start, erq->head, erq->tail); erq->start, erq->head, erq->tail);
} }
@ -530,13 +535,9 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
} }
err_printf(m, " seqno: 0x%08x\n", ee->seqno); err_printf(m, " seqno: 0x%08x\n", ee->seqno);
err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno); err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno);
err_printf(m, " waiting: %s\n", yesno(ee->waiting));
err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head);
err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail);
err_printf(m, " hangcheck stall: %s\n", yesno(ee->hangcheck_stalled)); err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n",
err_printf(m, " hangcheck action: %s\n",
hangcheck_action_to_str(ee->hangcheck_action));
err_printf(m, " hangcheck action timestamp: %dms (%lu%s)\n",
jiffies_to_msecs(ee->hangcheck_timestamp - epoch), jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
ee->hangcheck_timestamp, ee->hangcheck_timestamp,
ee->hangcheck_timestamp == epoch ? "; epoch" : ""); ee->hangcheck_timestamp == epoch ? "; epoch" : "");
@ -684,8 +685,9 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
jiffies_to_msecs(error->capture - error->epoch)); jiffies_to_msecs(error->capture - error->epoch));
for (i = 0; i < ARRAY_SIZE(error->engine); i++) { for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
if (error->engine[i].hangcheck_stalled && if (!error->engine[i].context.pid)
error->engine[i].context.pid) { continue;
err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n", err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
engine_name(m->i915, i), engine_name(m->i915, i),
error->engine[i].context.comm, error->engine[i].context.comm,
@ -693,7 +695,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
error->engine[i].context.ban_score, error->engine[i].context.ban_score,
bannable(&error->engine[i].context)); bannable(&error->engine[i].context));
} }
}
err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Reset count: %u\n", error->reset_count);
err_printf(m, "Suspend count: %u\n", error->suspend_count); err_printf(m, "Suspend count: %u\n", error->suspend_count);
err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform)); err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform));
@ -722,8 +723,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
err_printf(m, "CCID: 0x%08x\n", error->ccid); err_printf(m, "CCID: 0x%08x\n", error->ccid);
err_printf(m, "Missed interrupts: 0x%08lx\n",
m->i915->gpu_error.missed_irq_rings);
for (i = 0; i < error->nfence; i++) for (i = 0; i < error->nfence; i++)
err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]);
@ -807,21 +806,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
error->epoch); error->epoch);
} }
if (IS_ERR(ee->waiters)) {
err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
m->i915->engine[i]->name);
} else if (ee->num_waiters) {
err_printf(m, "%s --- %d waiters\n",
m->i915->engine[i]->name,
ee->num_waiters);
for (j = 0; j < ee->num_waiters; j++) {
err_printf(m, " seqno 0x%08x for %s [%d]\n",
ee->waiters[j].seqno,
ee->waiters[j].comm,
ee->waiters[j].pid);
}
}
print_error_obj(m, m->i915->engine[i], print_error_obj(m, m->i915->engine[i],
"ringbuffer", ee->ringbuffer); "ringbuffer", ee->ringbuffer);
@ -1003,8 +987,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
i915_error_object_free(ee->wa_ctx); i915_error_object_free(ee->wa_ctx);
kfree(ee->requests); kfree(ee->requests);
if (!IS_ERR_OR_NULL(ee->waiters))
kfree(ee->waiters);
} }
for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
@ -1124,7 +1106,9 @@ static void capture_bo(struct drm_i915_error_buffer *err,
static u32 capture_error_bo(struct drm_i915_error_buffer *err, static u32 capture_error_bo(struct drm_i915_error_buffer *err,
int count, struct list_head *head, int count, struct list_head *head,
bool pinned_only) unsigned int flags)
#define ACTIVE_ONLY BIT(0)
#define PINNED_ONLY BIT(1)
{ {
struct i915_vma *vma; struct i915_vma *vma;
int i = 0; int i = 0;
@ -1133,7 +1117,10 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
if (!vma->obj) if (!vma->obj)
continue; continue;
if (pinned_only && !i915_vma_is_pinned(vma)) if (flags & ACTIVE_ONLY && !i915_vma_is_active(vma))
continue;
if (flags & PINNED_ONLY && !i915_vma_is_pinned(vma))
continue; continue;
capture_bo(err++, vma); capture_bo(err++, vma);
@ -1144,7 +1131,8 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
return i; return i;
} }
/* Generate a semi-unique error code. The code is not meant to have meaning, The /*
* Generate a semi-unique error code. The code is not meant to have meaning, The
* code's only purpose is to try to prevent false duplicated bug reports by * code's only purpose is to try to prevent false duplicated bug reports by
* grossly estimating a GPU error state. * grossly estimating a GPU error state.
* *
@ -1153,29 +1141,23 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
* *
* It's only a small step better than a random number in its current form. * It's only a small step better than a random number in its current form.
*/ */
static u32 i915_error_generate_code(struct drm_i915_private *dev_priv, static u32 i915_error_generate_code(struct i915_gpu_state *error,
struct i915_gpu_state *error, unsigned long engine_mask)
int *engine_id)
{ {
u32 error_code = 0; /*
int i; * IPEHR would be an ideal way to detect errors, as it's the gross
/* IPEHR would be an ideal way to detect errors, as it's the gross
* measure of "the command that hung." However, has some very common * measure of "the command that hung." However, has some very common
* synchronization commands which almost always appear in the case * synchronization commands which almost always appear in the case
* strictly a client bug. Use instdone to differentiate those some. * strictly a client bug. Use instdone to differentiate those some.
*/ */
for (i = 0; i < I915_NUM_ENGINES; i++) { if (engine_mask) {
if (error->engine[i].hangcheck_stalled) { struct drm_i915_error_engine *ee =
if (engine_id) &error->engine[ffs(engine_mask)];
*engine_id = i;
return error->engine[i].ipehr ^ return ee->ipehr ^ ee->instdone.instdone;
error->engine[i].instdone.instdone;
}
} }
return error_code; return 0;
} }
static void gem_record_fences(struct i915_gpu_state *error) static void gem_record_fences(struct i915_gpu_state *error)
@ -1208,59 +1190,6 @@ static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
I915_READ(RING_SYNC_2(engine->mmio_base)); I915_READ(RING_SYNC_2(engine->mmio_base));
} }
static void error_record_engine_waiters(struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct drm_i915_error_waiter *waiter;
struct rb_node *rb;
int count;
ee->num_waiters = 0;
ee->waiters = NULL;
if (RB_EMPTY_ROOT(&b->waiters))
return;
if (!spin_trylock_irq(&b->rb_lock)) {
ee->waiters = ERR_PTR(-EDEADLK);
return;
}
count = 0;
for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
count++;
spin_unlock_irq(&b->rb_lock);
waiter = NULL;
if (count)
waiter = kmalloc_array(count,
sizeof(struct drm_i915_error_waiter),
GFP_ATOMIC);
if (!waiter)
return;
if (!spin_trylock_irq(&b->rb_lock)) {
kfree(waiter);
ee->waiters = ERR_PTR(-EDEADLK);
return;
}
ee->waiters = waiter;
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = rb_entry(rb, typeof(*w), node);
strcpy(waiter->comm, w->tsk->comm);
waiter->pid = w->tsk->pid;
waiter->seqno = w->seqno;
waiter++;
if (++ee->num_waiters == count)
break;
}
spin_unlock_irq(&b->rb_lock);
}
static void error_record_engine_registers(struct i915_gpu_state *error, static void error_record_engine_registers(struct i915_gpu_state *error,
struct intel_engine_cs *engine, struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee) struct drm_i915_error_engine *ee)
@ -1296,7 +1225,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
intel_engine_get_instdone(engine, &ee->instdone); intel_engine_get_instdone(engine, &ee->instdone);
ee->waiting = intel_engine_has_waiter(engine);
ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
ee->acthd = intel_engine_get_active_head(engine); ee->acthd = intel_engine_get_active_head(engine);
ee->seqno = intel_engine_get_seqno(engine); ee->seqno = intel_engine_get_seqno(engine);
@ -1338,9 +1266,8 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
} }
ee->idle = intel_engine_is_idle(engine); ee->idle = intel_engine_is_idle(engine);
if (!ee->idle)
ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
ee->hangcheck_action = engine->hangcheck.action;
ee->hangcheck_stalled = engine->hangcheck.stalled;
ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error, ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
engine); engine);
@ -1371,6 +1298,7 @@ static void record_request(struct i915_request *request,
{ {
struct i915_gem_context *ctx = request->gem_context; struct i915_gem_context *ctx = request->gem_context;
erq->flags = request->fence.flags;
erq->context = ctx->hw_id; erq->context = ctx->hw_id;
erq->sched_attr = request->sched.attr; erq->sched_attr = request->sched.attr;
erq->ban_score = atomic_read(&ctx->ban_score); erq->ban_score = atomic_read(&ctx->ban_score);
@ -1546,7 +1474,6 @@ static void gem_record_rings(struct i915_gpu_state *error)
ee->engine_id = i; ee->engine_id = i;
error_record_engine_registers(error, engine, ee); error_record_engine_registers(error, engine, ee);
error_record_engine_waiters(engine, ee);
error_record_engine_execlists(engine, ee); error_record_engine_execlists(engine, ee);
request = i915_gem_find_active_request(engine); request = i915_gem_find_active_request(engine);
@ -1610,14 +1537,17 @@ static void gem_capture_vm(struct i915_gpu_state *error,
int count; int count;
count = 0; count = 0;
list_for_each_entry(vma, &vm->active_list, vm_link) list_for_each_entry(vma, &vm->bound_list, vm_link)
if (i915_vma_is_active(vma))
count++; count++;
active_bo = NULL; active_bo = NULL;
if (count) if (count)
active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC); active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
if (active_bo) if (active_bo)
count = capture_error_bo(active_bo, count, &vm->active_list, false); count = capture_error_bo(active_bo,
count, &vm->bound_list,
ACTIVE_ONLY);
else else
count = 0; count = 0;
@ -1655,28 +1585,20 @@ static void capture_pinned_buffers(struct i915_gpu_state *error)
struct i915_address_space *vm = &error->i915->ggtt.vm; struct i915_address_space *vm = &error->i915->ggtt.vm;
struct drm_i915_error_buffer *bo; struct drm_i915_error_buffer *bo;
struct i915_vma *vma; struct i915_vma *vma;
int count_inactive, count_active; int count;
count_inactive = 0; count = 0;
list_for_each_entry(vma, &vm->inactive_list, vm_link) list_for_each_entry(vma, &vm->bound_list, vm_link)
count_inactive++; count++;
count_active = 0;
list_for_each_entry(vma, &vm->active_list, vm_link)
count_active++;
bo = NULL; bo = NULL;
if (count_inactive + count_active) if (count)
bo = kcalloc(count_inactive + count_active, bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
sizeof(*bo), GFP_ATOMIC);
if (!bo) if (!bo)
return; return;
count_inactive = capture_error_bo(bo, count_inactive, error->pinned_bo_count =
&vm->active_list, true); capture_error_bo(bo, count, &vm->bound_list, PINNED_ONLY);
count_active = capture_error_bo(bo + count_inactive, count_active,
&vm->inactive_list, true);
error->pinned_bo_count = count_inactive + count_active;
error->pinned_bo = bo; error->pinned_bo = bo;
} }
@ -1783,31 +1705,35 @@ static void capture_reg_state(struct i915_gpu_state *error)
error->pgtbl_er = I915_READ(PGTBL_ER); error->pgtbl_er = I915_READ(PGTBL_ER);
} }
static void i915_error_capture_msg(struct drm_i915_private *dev_priv, static const char *
struct i915_gpu_state *error, error_msg(struct i915_gpu_state *error, unsigned long engines, const char *msg)
u32 engine_mask,
const char *error_msg)
{ {
u32 ecode; int len;
int engine_id = -1, len; int i;
ecode = i915_error_generate_code(dev_priv, error, &engine_id); for (i = 0; i < ARRAY_SIZE(error->engine); i++)
if (!error->engine[i].context.pid)
engines &= ~BIT(i);
len = scnprintf(error->error_msg, sizeof(error->error_msg), len = scnprintf(error->error_msg, sizeof(error->error_msg),
"GPU HANG: ecode %d:%d:0x%08x", "GPU HANG: ecode %d:%lx:0x%08x",
INTEL_GEN(dev_priv), engine_id, ecode); INTEL_GEN(error->i915), engines,
i915_error_generate_code(error, engines));
if (engine_id != -1 && error->engine[engine_id].context.pid) if (engines) {
/* Just show the first executing process, more is confusing */
i = ffs(engines);
len += scnprintf(error->error_msg + len, len += scnprintf(error->error_msg + len,
sizeof(error->error_msg) - len, sizeof(error->error_msg) - len,
", in %s [%d]", ", in %s [%d]",
error->engine[engine_id].context.comm, error->engine[i].context.comm,
error->engine[engine_id].context.pid); error->engine[i].context.pid);
}
if (msg)
len += scnprintf(error->error_msg + len,
sizeof(error->error_msg) - len,
", %s", msg);
scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, return error->error_msg;
", reason: %s, action: %s",
error_msg,
engine_mask ? "reset" : "continue");
} }
static void capture_gen_state(struct i915_gpu_state *error) static void capture_gen_state(struct i915_gpu_state *error)
@ -1847,7 +1773,7 @@ static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
for (i = 0; i < ARRAY_SIZE(error->engine); i++) { for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
const struct drm_i915_error_engine *ee = &error->engine[i]; const struct drm_i915_error_engine *ee = &error->engine[i];
if (ee->hangcheck_stalled && if (ee->hangcheck_timestamp &&
time_before(ee->hangcheck_timestamp, epoch)) time_before(ee->hangcheck_timestamp, epoch))
epoch = ee->hangcheck_timestamp; epoch = ee->hangcheck_timestamp;
} }
@ -1921,7 +1847,7 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
* i915_capture_error_state - capture an error record for later analysis * i915_capture_error_state - capture an error record for later analysis
* @i915: i915 device * @i915: i915 device
* @engine_mask: the mask of engines triggering the hang * @engine_mask: the mask of engines triggering the hang
* @error_msg: a message to insert into the error capture header * @msg: a message to insert into the error capture header
* *
* Should be called when an error is detected (either a hang or an error * Should be called when an error is detected (either a hang or an error
* interrupt) to capture error state from the time of the error. Fills * interrupt) to capture error state from the time of the error. Fills
@ -1929,8 +1855,8 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
* to pick up. * to pick up.
*/ */
void i915_capture_error_state(struct drm_i915_private *i915, void i915_capture_error_state(struct drm_i915_private *i915,
u32 engine_mask, unsigned long engine_mask,
const char *error_msg) const char *msg)
{ {
static bool warned; static bool warned;
struct i915_gpu_state *error; struct i915_gpu_state *error;
@ -1946,8 +1872,7 @@ void i915_capture_error_state(struct drm_i915_private *i915,
if (IS_ERR(error)) if (IS_ERR(error))
return; return;
i915_error_capture_msg(i915, error, engine_mask, error_msg); dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg));
DRM_INFO("%s\n", error->error_msg);
if (!error->simulated) { if (!error->simulated) {
spin_lock_irqsave(&i915->gpu_error.lock, flags); spin_lock_irqsave(&i915->gpu_error.lock, flags);

View file

@ -82,11 +82,7 @@ struct i915_gpu_state {
int engine_id; int engine_id;
/* Software tracked state */ /* Software tracked state */
bool idle; bool idle;
bool waiting;
int num_waiters;
unsigned long hangcheck_timestamp; unsigned long hangcheck_timestamp;
bool hangcheck_stalled;
enum intel_engine_hangcheck_action hangcheck_action;
struct i915_address_space *vm; struct i915_address_space *vm;
int num_requests; int num_requests;
u32 reset_count; u32 reset_count;
@ -149,6 +145,7 @@ struct i915_gpu_state {
struct drm_i915_error_object *default_state; struct drm_i915_error_object *default_state;
struct drm_i915_error_request { struct drm_i915_error_request {
unsigned long flags;
long jiffies; long jiffies;
pid_t pid; pid_t pid;
u32 context; u32 context;
@ -161,12 +158,6 @@ struct i915_gpu_state {
} *requests, execlist[EXECLIST_MAX_PORTS]; } *requests, execlist[EXECLIST_MAX_PORTS];
unsigned int num_ports; unsigned int num_ports;
struct drm_i915_error_waiter {
char comm[TASK_COMM_LEN];
pid_t pid;
u32 seqno;
} *waiters;
struct { struct {
u32 gfx_mode; u32 gfx_mode;
union { union {
@ -197,6 +188,8 @@ struct i915_gpu_state {
struct scatterlist *sgl, *fit; struct scatterlist *sgl, *fit;
}; };
struct i915_gpu_restart;
struct i915_gpu_error { struct i915_gpu_error {
/* For hangcheck timer */ /* For hangcheck timer */
#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
@ -211,8 +204,6 @@ struct i915_gpu_error {
atomic_t pending_fb_pin; atomic_t pending_fb_pin;
unsigned long missed_irq_rings;
/** /**
* State variable controlling the reset flow and count * State variable controlling the reset flow and count
* *
@ -247,15 +238,6 @@ struct i915_gpu_error {
* i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
* secondary role in preventing two concurrent global reset attempts. * secondary role in preventing two concurrent global reset attempts.
* *
* #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the
* struct_mutex. We try to acquire the struct_mutex in the reset worker,
* but it may be held by some long running waiter (that we cannot
* interrupt without causing trouble). Once we are ready to do the GPU
* reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If
* they already hold the struct_mutex and want to participate they can
* inspect the bit and do the reset directly, otherwise the worker
* waits for the struct_mutex.
*
* #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
* acquire the struct_mutex to reset an engine, we need an explicit * acquire the struct_mutex to reset an engine, we need an explicit
* flag to prevent two concurrent reset attempts in the same engine. * flag to prevent two concurrent reset attempts in the same engine.
@ -269,20 +251,13 @@ struct i915_gpu_error {
*/ */
unsigned long flags; unsigned long flags;
#define I915_RESET_BACKOFF 0 #define I915_RESET_BACKOFF 0
#define I915_RESET_HANDOFF 1 #define I915_RESET_MODESET 1
#define I915_RESET_MODESET 2 #define I915_RESET_ENGINE 2
#define I915_RESET_ENGINE 3
#define I915_WEDGED (BITS_PER_LONG - 1) #define I915_WEDGED (BITS_PER_LONG - 1)
/** Number of times an engine has been reset */ /** Number of times an engine has been reset */
u32 reset_engine_count[I915_NUM_ENGINES]; u32 reset_engine_count[I915_NUM_ENGINES];
/** Set of stalled engines with guilty requests, in the current reset */
u32 stalled_mask;
/** Reason for the current *global* reset */
const char *reason;
struct mutex wedge_mutex; /* serialises wedging/unwedging */ struct mutex wedge_mutex; /* serialises wedging/unwedging */
/** /**
@ -297,8 +272,7 @@ struct i915_gpu_error {
*/ */
wait_queue_head_t reset_queue; wait_queue_head_t reset_queue;
/* For missed irq/seqno simulation. */ struct i915_gpu_restart *restart;
unsigned long test_irq_rings;
}; };
struct drm_i915_error_state_buf { struct drm_i915_error_state_buf {
@ -320,7 +294,7 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
void i915_capture_error_state(struct drm_i915_private *dev_priv, void i915_capture_error_state(struct drm_i915_private *dev_priv,
u32 engine_mask, unsigned long engine_mask,
const char *error_msg); const char *error_msg);
static inline struct i915_gpu_state * static inline struct i915_gpu_state *

View file

@ -823,11 +823,26 @@ static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv)
static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe) static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
{ {
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
const struct drm_display_mode *mode = &vblank->hwmode;
i915_reg_t high_frame, low_frame; i915_reg_t high_frame, low_frame;
u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal; u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal;
const struct drm_display_mode *mode = &dev->vblank[pipe].hwmode;
unsigned long irqflags; unsigned long irqflags;
/*
* On i965gm TV output the frame counter only works up to
* the point when we enable the TV encoder. After that the
* frame counter ceases to work and reads zero. We need a
* vblank wait before enabling the TV encoder and so we
* have to enable vblank interrupts while the frame counter
* is still in a working state. However the core vblank code
* does not like us returning non-zero frame counter values
* when we've told it that we don't have a working frame
* counter. Thus we must stop non-zero values leaking out.
*/
if (!vblank->max_vblank_count)
return 0;
htotal = mode->crtc_htotal; htotal = mode->crtc_htotal;
hsync_start = mode->crtc_hsync_start; hsync_start = mode->crtc_hsync_start;
vbl_start = mode->crtc_vblank_start; vbl_start = mode->crtc_vblank_start;
@ -999,6 +1014,9 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
int position; int position;
int vbl_start, vbl_end, hsync_start, htotal, vtotal; int vbl_start, vbl_end, hsync_start, htotal, vtotal;
unsigned long irqflags; unsigned long irqflags;
bool use_scanline_counter = INTEL_GEN(dev_priv) >= 5 ||
IS_G4X(dev_priv) || IS_GEN(dev_priv, 2) ||
mode->private_flags & I915_MODE_FLAG_USE_SCANLINE_COUNTER;
if (WARN_ON(!mode->crtc_clock)) { if (WARN_ON(!mode->crtc_clock)) {
DRM_DEBUG_DRIVER("trying to get scanoutpos for disabled " DRM_DEBUG_DRIVER("trying to get scanoutpos for disabled "
@ -1031,7 +1049,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
if (stime) if (stime)
*stime = ktime_get(); *stime = ktime_get();
if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { if (use_scanline_counter) {
/* No obvious pixelcount register. Only query vertical /* No obvious pixelcount register. Only query vertical
* scanout position from Display scan line register. * scanout position from Display scan line register.
*/ */
@ -1091,7 +1109,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
else else
position += vtotal - vbl_end; position += vtotal - vbl_end;
if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { if (use_scanline_counter) {
*vpos = position; *vpos = position;
*hpos = 0; *hpos = 0;
} else { } else {
@ -1153,68 +1171,6 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
return; return;
} }
static void notify_ring(struct intel_engine_cs *engine)
{
const u32 seqno = intel_engine_get_seqno(engine);
struct i915_request *rq = NULL;
struct task_struct *tsk = NULL;
struct intel_wait *wait;
if (unlikely(!engine->breadcrumbs.irq_armed))
return;
rcu_read_lock();
spin_lock(&engine->breadcrumbs.irq_lock);
wait = engine->breadcrumbs.irq_wait;
if (wait) {
/*
* We use a callback from the dma-fence to submit
* requests after waiting on our own requests. To
* ensure minimum delay in queuing the next request to
* hardware, signal the fence now rather than wait for
* the signaler to be woken up. We still wake up the
* waiter in order to handle the irq-seqno coherency
* issues (we may receive the interrupt before the
* seqno is written, see __i915_request_irq_complete())
* and to handle coalescing of multiple seqno updates
* and many waiters.
*/
if (i915_seqno_passed(seqno, wait->seqno)) {
struct i915_request *waiter = wait->request;
if (waiter &&
!i915_request_signaled(waiter) &&
intel_wait_check_request(wait, waiter))
rq = i915_request_get(waiter);
tsk = wait->tsk;
}
engine->breadcrumbs.irq_count++;
} else {
if (engine->breadcrumbs.irq_armed)
__intel_engine_disarm_breadcrumbs(engine);
}
spin_unlock(&engine->breadcrumbs.irq_lock);
if (rq) {
spin_lock(&rq->lock);
dma_fence_signal_locked(&rq->fence);
GEM_BUG_ON(!i915_request_completed(rq));
spin_unlock(&rq->lock);
i915_request_put(rq);
}
if (tsk && tsk->state & TASK_NORMAL)
wake_up_process(tsk);
rcu_read_unlock();
trace_intel_engine_notify(engine, wait);
}
static void vlv_c0_read(struct drm_i915_private *dev_priv, static void vlv_c0_read(struct drm_i915_private *dev_priv,
struct intel_rps_ei *ei) struct intel_rps_ei *ei)
{ {
@ -1459,20 +1415,20 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv,
u32 gt_iir) u32 gt_iir)
{ {
if (gt_iir & GT_RENDER_USER_INTERRUPT) if (gt_iir & GT_RENDER_USER_INTERRUPT)
notify_ring(dev_priv->engine[RCS]); intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
if (gt_iir & ILK_BSD_USER_INTERRUPT) if (gt_iir & ILK_BSD_USER_INTERRUPT)
notify_ring(dev_priv->engine[VCS]); intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
} }
static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
u32 gt_iir) u32 gt_iir)
{ {
if (gt_iir & GT_RENDER_USER_INTERRUPT) if (gt_iir & GT_RENDER_USER_INTERRUPT)
notify_ring(dev_priv->engine[RCS]); intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
if (gt_iir & GT_BSD_USER_INTERRUPT) if (gt_iir & GT_BSD_USER_INTERRUPT)
notify_ring(dev_priv->engine[VCS]); intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
if (gt_iir & GT_BLT_USER_INTERRUPT) if (gt_iir & GT_BLT_USER_INTERRUPT)
notify_ring(dev_priv->engine[BCS]); intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]);
if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
GT_BSD_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT |
@ -1492,7 +1448,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
tasklet = true; tasklet = true;
if (iir & GT_RENDER_USER_INTERRUPT) { if (iir & GT_RENDER_USER_INTERRUPT) {
notify_ring(engine); intel_engine_breadcrumbs_irq(engine);
tasklet |= USES_GUC_SUBMISSION(engine->i915); tasklet |= USES_GUC_SUBMISSION(engine->i915);
} }
@ -1838,7 +1794,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
if (HAS_VEBOX(dev_priv)) { if (HAS_VEBOX(dev_priv)) {
if (pm_iir & PM_VEBOX_USER_INTERRUPT) if (pm_iir & PM_VEBOX_USER_INTERRUPT)
notify_ring(dev_priv->engine[VECS]); intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]);
if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
@ -4262,7 +4218,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
I915_WRITE16(IIR, iir); I915_WRITE16(IIR, iir);
if (iir & I915_USER_INTERRUPT) if (iir & I915_USER_INTERRUPT)
notify_ring(dev_priv->engine[RCS]); intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
if (iir & I915_MASTER_ERROR_INTERRUPT) if (iir & I915_MASTER_ERROR_INTERRUPT)
i8xx_error_irq_handler(dev_priv, eir, eir_stuck); i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@ -4370,7 +4326,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
I915_WRITE(IIR, iir); I915_WRITE(IIR, iir);
if (iir & I915_USER_INTERRUPT) if (iir & I915_USER_INTERRUPT)
notify_ring(dev_priv->engine[RCS]); intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
if (iir & I915_MASTER_ERROR_INTERRUPT) if (iir & I915_MASTER_ERROR_INTERRUPT)
i9xx_error_irq_handler(dev_priv, eir, eir_stuck); i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@ -4515,10 +4471,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
I915_WRITE(IIR, iir); I915_WRITE(IIR, iir);
if (iir & I915_USER_INTERRUPT) if (iir & I915_USER_INTERRUPT)
notify_ring(dev_priv->engine[RCS]); intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
if (iir & I915_BSD_USER_INTERRUPT) if (iir & I915_BSD_USER_INTERRUPT)
notify_ring(dev_priv->engine[VCS]); intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
if (iir & I915_MASTER_ERROR_INTERRUPT) if (iir & I915_MASTER_ERROR_INTERRUPT)
i9xx_error_irq_handler(dev_priv, eir, eir_stuck); i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@ -4581,16 +4537,10 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
if (INTEL_GEN(dev_priv) >= 8) if (INTEL_GEN(dev_priv) >= 8)
rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
if (IS_GEN(dev_priv, 2)) { if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
/* Gen2 doesn't have a hardware frame counter */
dev->max_vblank_count = 0;
} else if (IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) {
dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */
dev->driver->get_vblank_counter = g4x_get_vblank_counter; dev->driver->get_vblank_counter = g4x_get_vblank_counter;
} else { else if (INTEL_GEN(dev_priv) >= 3)
dev->driver->get_vblank_counter = i915_get_vblank_counter; dev->driver->get_vblank_counter = i915_get_vblank_counter;
dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
}
/* /*
* Opt out of the vblank disable timer on everything except gen2. * Opt out of the vblank disable timer on everything except gen2.

View file

@ -97,8 +97,10 @@ i915_param_named_unsafe(disable_power_well, int, 0400,
i915_param_named_unsafe(enable_ips, int, 0600, "Enable IPS (default: true)"); i915_param_named_unsafe(enable_ips, int, 0600, "Enable IPS (default: true)");
i915_param_named(fastboot, bool, 0600, i915_param_named(fastboot, int, 0600,
"Try to skip unnecessary mode sets at boot time (default: false)"); "Try to skip unnecessary mode sets at boot time "
"(0=disabled, 1=enabled) "
"Default: -1 (use per-chip default)");
i915_param_named_unsafe(prefault_disable, bool, 0600, i915_param_named_unsafe(prefault_disable, bool, 0600,
"Disable page prefaulting for pread/pwrite/reloc (default:false). " "Disable page prefaulting for pread/pwrite/reloc (default:false). "

View file

@ -63,10 +63,10 @@ struct drm_printer;
param(int, edp_vswing, 0) \ param(int, edp_vswing, 0) \
param(int, reset, 2) \ param(int, reset, 2) \
param(unsigned int, inject_load_failure, 0) \ param(unsigned int, inject_load_failure, 0) \
param(int, fastboot, -1) \
/* leave bools at the end to not create holes */ \ /* leave bools at the end to not create holes */ \
param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \ param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \
param(bool, enable_hangcheck, true) \ param(bool, enable_hangcheck, true) \
param(bool, fastboot, false) \
param(bool, prefault_disable, false) \ param(bool, prefault_disable, false) \
param(bool, load_detect_test, false) \ param(bool, load_detect_test, false) \
param(bool, force_reset_modeset_test, false) \ param(bool, force_reset_modeset_test, false) \

View file

@ -69,9 +69,15 @@
#define BDW_COLORS \ #define BDW_COLORS \
.color = { .degamma_lut_size = 512, .gamma_lut_size = 512 } .color = { .degamma_lut_size = 512, .gamma_lut_size = 512 }
#define CHV_COLORS \ #define CHV_COLORS \
.color = { .degamma_lut_size = 65, .gamma_lut_size = 257 } .color = { .degamma_lut_size = 65, .gamma_lut_size = 257, \
.degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \
.gamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \
}
#define GLK_COLORS \ #define GLK_COLORS \
.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024, \
.degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING | \
DRM_COLOR_LUT_EQUAL_CHANNELS, \
}
/* Keep in gen based order, and chronological order within a gen */ /* Keep in gen based order, and chronological order within a gen */
@ -707,6 +713,7 @@ static const struct pci_device_id pciidlist[] = {
INTEL_AML_KBL_GT2_IDS(&intel_kabylake_gt2_info), INTEL_AML_KBL_GT2_IDS(&intel_kabylake_gt2_info),
INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info), INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info),
INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info),
INTEL_CFL_H_GT1_IDS(&intel_coffeelake_gt1_info),
INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info),
INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info),
INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info),

View file

@ -2801,6 +2801,9 @@ enum i915_power_well_id {
#define GEN6_RCS_PWR_FSM _MMIO(0x22ac) #define GEN6_RCS_PWR_FSM _MMIO(0x22ac)
#define GEN9_RCS_FE_FSM2 _MMIO(0x22a4) #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4)
#define GEN10_CACHE_MODE_SS _MMIO(0xe420)
#define FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
/* Fuse readout registers for GT */ /* Fuse readout registers for GT */
#define HSW_PAVP_FUSE1 _MMIO(0x911C) #define HSW_PAVP_FUSE1 _MMIO(0x911C)
#define HSW_F1_EU_DIS_SHIFT 16 #define HSW_F1_EU_DIS_SHIFT 16
@ -4895,6 +4898,7 @@ enum {
# define TV_OVERSAMPLE_NONE (2 << 18) # define TV_OVERSAMPLE_NONE (2 << 18)
/* Selects 8x oversampling */ /* Selects 8x oversampling */
# define TV_OVERSAMPLE_8X (3 << 18) # define TV_OVERSAMPLE_8X (3 << 18)
# define TV_OVERSAMPLE_MASK (3 << 18)
/* Selects progressive mode rather than interlaced */ /* Selects progressive mode rather than interlaced */
# define TV_PROGRESSIVE (1 << 17) # define TV_PROGRESSIVE (1 << 17)
/* Sets the colorburst to PAL mode. Required for non-M PAL modes. */ /* Sets the colorburst to PAL mode. Required for non-M PAL modes. */
@ -5709,6 +5713,12 @@ enum {
#define PIPEMISC_DITHER_TYPE_SP (0 << 2) #define PIPEMISC_DITHER_TYPE_SP (0 << 2)
#define PIPEMISC(pipe) _MMIO_PIPE2(pipe, _PIPE_MISC_A) #define PIPEMISC(pipe) _MMIO_PIPE2(pipe, _PIPE_MISC_A)
/* Skylake+ pipe bottom (background) color */
#define _SKL_BOTTOM_COLOR_A 0x70034
#define SKL_BOTTOM_COLOR_GAMMA_ENABLE (1 << 31)
#define SKL_BOTTOM_COLOR_CSC_ENABLE (1 << 30)
#define SKL_BOTTOM_COLOR(pipe) _MMIO_PIPE2(pipe, _SKL_BOTTOM_COLOR_A)
#define VLV_DPFLIPSTAT _MMIO(VLV_DISPLAY_BASE + 0x70028) #define VLV_DPFLIPSTAT _MMIO(VLV_DISPLAY_BASE + 0x70028)
#define PIPEB_LINE_COMPARE_INT_EN (1 << 29) #define PIPEB_LINE_COMPARE_INT_EN (1 << 29)
#define PIPEB_HLINE_INT_EN (1 << 28) #define PIPEB_HLINE_INT_EN (1 << 28)
@ -9553,7 +9563,7 @@ enum skl_power_gate {
#define _MG_PLL3_ENABLE 0x46038 #define _MG_PLL3_ENABLE 0x46038
#define _MG_PLL4_ENABLE 0x4603C #define _MG_PLL4_ENABLE 0x4603C
/* Bits are the same as DPLL0_ENABLE */ /* Bits are the same as DPLL0_ENABLE */
#define MG_PLL_ENABLE(port) _MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \ #define MG_PLL_ENABLE(tc_port) _MMIO_PORT((tc_port), _MG_PLL1_ENABLE, \
_MG_PLL2_ENABLE) _MG_PLL2_ENABLE)
#define _MG_REFCLKIN_CTL_PORT1 0x16892C #define _MG_REFCLKIN_CTL_PORT1 0x16892C
@ -9562,7 +9572,7 @@ enum skl_power_gate {
#define _MG_REFCLKIN_CTL_PORT4 0x16B92C #define _MG_REFCLKIN_CTL_PORT4 0x16B92C
#define MG_REFCLKIN_CTL_OD_2_MUX(x) ((x) << 8) #define MG_REFCLKIN_CTL_OD_2_MUX(x) ((x) << 8)
#define MG_REFCLKIN_CTL_OD_2_MUX_MASK (0x7 << 8) #define MG_REFCLKIN_CTL_OD_2_MUX_MASK (0x7 << 8)
#define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \ #define MG_REFCLKIN_CTL(tc_port) _MMIO_PORT((tc_port), \
_MG_REFCLKIN_CTL_PORT1, \ _MG_REFCLKIN_CTL_PORT1, \
_MG_REFCLKIN_CTL_PORT2) _MG_REFCLKIN_CTL_PORT2)
@ -9574,7 +9584,7 @@ enum skl_power_gate {
#define MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO_MASK (0xff << 16) #define MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO_MASK (0xff << 16)
#define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x) ((x) << 8) #define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x) ((x) << 8)
#define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK (0xff << 8) #define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK (0xff << 8)
#define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \ #define MG_CLKTOP2_CORECLKCTL1(tc_port) _MMIO_PORT((tc_port), \
_MG_CLKTOP2_CORECLKCTL1_PORT1, \ _MG_CLKTOP2_CORECLKCTL1_PORT1, \
_MG_CLKTOP2_CORECLKCTL1_PORT2) _MG_CLKTOP2_CORECLKCTL1_PORT2)
@ -9594,7 +9604,7 @@ enum skl_power_gate {
#define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x) ((x) << 8) #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x) ((x) << 8)
#define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_SHIFT 8 #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_SHIFT 8
#define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK (0xf << 8) #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK (0xf << 8)
#define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \ #define MG_CLKTOP2_HSCLKCTL(tc_port) _MMIO_PORT((tc_port), \
_MG_CLKTOP2_HSCLKCTL_PORT1, \ _MG_CLKTOP2_HSCLKCTL_PORT1, \
_MG_CLKTOP2_HSCLKCTL_PORT2) _MG_CLKTOP2_HSCLKCTL_PORT2)
@ -9608,7 +9618,7 @@ enum skl_power_gate {
#define MG_PLL_DIV0_FBDIV_FRAC(x) ((x) << 8) #define MG_PLL_DIV0_FBDIV_FRAC(x) ((x) << 8)
#define MG_PLL_DIV0_FBDIV_INT_MASK (0xff << 0) #define MG_PLL_DIV0_FBDIV_INT_MASK (0xff << 0)
#define MG_PLL_DIV0_FBDIV_INT(x) ((x) << 0) #define MG_PLL_DIV0_FBDIV_INT(x) ((x) << 0)
#define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \ #define MG_PLL_DIV0(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV0_PORT1, \
_MG_PLL_DIV0_PORT2) _MG_PLL_DIV0_PORT2)
#define _MG_PLL_DIV1_PORT1 0x168A04 #define _MG_PLL_DIV1_PORT1 0x168A04
@ -9623,7 +9633,7 @@ enum skl_power_gate {
#define MG_PLL_DIV1_NDIVRATIO(x) ((x) << 4) #define MG_PLL_DIV1_NDIVRATIO(x) ((x) << 4)
#define MG_PLL_DIV1_FBPREDIV_MASK (0xf << 0) #define MG_PLL_DIV1_FBPREDIV_MASK (0xf << 0)
#define MG_PLL_DIV1_FBPREDIV(x) ((x) << 0) #define MG_PLL_DIV1_FBPREDIV(x) ((x) << 0)
#define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \ #define MG_PLL_DIV1(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV1_PORT1, \
_MG_PLL_DIV1_PORT2) _MG_PLL_DIV1_PORT2)
#define _MG_PLL_LF_PORT1 0x168A08 #define _MG_PLL_LF_PORT1 0x168A08
@ -9636,7 +9646,7 @@ enum skl_power_gate {
#define MG_PLL_LF_GAINCTRL(x) ((x) << 16) #define MG_PLL_LF_GAINCTRL(x) ((x) << 16)
#define MG_PLL_LF_INT_COEFF(x) ((x) << 8) #define MG_PLL_LF_INT_COEFF(x) ((x) << 8)
#define MG_PLL_LF_PROP_COEFF(x) ((x) << 0) #define MG_PLL_LF_PROP_COEFF(x) ((x) << 0)
#define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \ #define MG_PLL_LF(tc_port) _MMIO_PORT((tc_port), _MG_PLL_LF_PORT1, \
_MG_PLL_LF_PORT2) _MG_PLL_LF_PORT2)
#define _MG_PLL_FRAC_LOCK_PORT1 0x168A0C #define _MG_PLL_FRAC_LOCK_PORT1 0x168A0C
@ -9649,7 +9659,7 @@ enum skl_power_gate {
#define MG_PLL_FRAC_LOCK_DCODITHEREN (1 << 10) #define MG_PLL_FRAC_LOCK_DCODITHEREN (1 << 10)
#define MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN (1 << 8) #define MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN (1 << 8)
#define MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x) ((x) << 0) #define MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x) ((x) << 0)
#define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \ #define MG_PLL_FRAC_LOCK(tc_port) _MMIO_PORT((tc_port), \
_MG_PLL_FRAC_LOCK_PORT1, \ _MG_PLL_FRAC_LOCK_PORT1, \
_MG_PLL_FRAC_LOCK_PORT2) _MG_PLL_FRAC_LOCK_PORT2)
@ -9663,7 +9673,7 @@ enum skl_power_gate {
#define MG_PLL_SSC_STEPNUM(x) ((x) << 10) #define MG_PLL_SSC_STEPNUM(x) ((x) << 10)
#define MG_PLL_SSC_FLLEN (1 << 9) #define MG_PLL_SSC_FLLEN (1 << 9)
#define MG_PLL_SSC_STEPSIZE(x) ((x) << 0) #define MG_PLL_SSC_STEPSIZE(x) ((x) << 0)
#define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \ #define MG_PLL_SSC(tc_port) _MMIO_PORT((tc_port), _MG_PLL_SSC_PORT1, \
_MG_PLL_SSC_PORT2) _MG_PLL_SSC_PORT2)
#define _MG_PLL_BIAS_PORT1 0x168A14 #define _MG_PLL_BIAS_PORT1 0x168A14
@ -9683,7 +9693,7 @@ enum skl_power_gate {
#define MG_PLL_BIAS_VREF_RDAC_MASK (0x7 << 5) #define MG_PLL_BIAS_VREF_RDAC_MASK (0x7 << 5)
#define MG_PLL_BIAS_IREFTRIM(x) ((x) << 0) #define MG_PLL_BIAS_IREFTRIM(x) ((x) << 0)
#define MG_PLL_BIAS_IREFTRIM_MASK (0x1f << 0) #define MG_PLL_BIAS_IREFTRIM_MASK (0x1f << 0)
#define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \ #define MG_PLL_BIAS(tc_port) _MMIO_PORT((tc_port), _MG_PLL_BIAS_PORT1, \
_MG_PLL_BIAS_PORT2) _MG_PLL_BIAS_PORT2)
#define _MG_PLL_TDC_COLDST_BIAS_PORT1 0x168A18 #define _MG_PLL_TDC_COLDST_BIAS_PORT1 0x168A18
@ -9695,7 +9705,7 @@ enum skl_power_gate {
#define MG_PLL_TDC_COLDST_COLDSTART (1 << 16) #define MG_PLL_TDC_COLDST_COLDSTART (1 << 16)
#define MG_PLL_TDC_TDCOVCCORR_EN (1 << 2) #define MG_PLL_TDC_TDCOVCCORR_EN (1 << 2)
#define MG_PLL_TDC_TDCSEL(x) ((x) << 0) #define MG_PLL_TDC_TDCSEL(x) ((x) << 0)
#define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \ #define MG_PLL_TDC_COLDST_BIAS(tc_port) _MMIO_PORT((tc_port), \
_MG_PLL_TDC_COLDST_BIAS_PORT1, \ _MG_PLL_TDC_COLDST_BIAS_PORT1, \
_MG_PLL_TDC_COLDST_BIAS_PORT2) _MG_PLL_TDC_COLDST_BIAS_PORT2)

View file

@ -60,7 +60,7 @@ static bool i915_fence_signaled(struct dma_fence *fence)
static bool i915_fence_enable_signaling(struct dma_fence *fence) static bool i915_fence_enable_signaling(struct dma_fence *fence)
{ {
return intel_engine_enable_signaling(to_request(fence), true); return i915_request_enable_breadcrumb(to_request(fence));
} }
static signed long i915_fence_wait(struct dma_fence *fence, static signed long i915_fence_wait(struct dma_fence *fence,
@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request)
static void __retire_engine_request(struct intel_engine_cs *engine, static void __retire_engine_request(struct intel_engine_cs *engine,
struct i915_request *rq) struct i915_request *rq)
{ {
GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n",
__func__, engine->name, __func__, engine->name,
rq->fence.context, rq->fence.seqno, rq->fence.context, rq->fence.seqno,
rq->global_seqno, rq->global_seqno,
hwsp_seqno(rq),
intel_engine_get_seqno(engine)); intel_engine_get_seqno(engine));
GEM_BUG_ON(!i915_request_completed(rq)); GEM_BUG_ON(!i915_request_completed(rq));
@ -198,10 +199,11 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
spin_unlock(&engine->timeline.lock); spin_unlock(&engine->timeline.lock);
spin_lock(&rq->lock); spin_lock(&rq->lock);
i915_request_mark_complete(rq);
if (!i915_request_signaled(rq)) if (!i915_request_signaled(rq))
dma_fence_signal_locked(&rq->fence); dma_fence_signal_locked(&rq->fence);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
intel_engine_cancel_signaling(rq); i915_request_cancel_breadcrumb(rq);
if (rq->waitboost) { if (rq->waitboost) {
GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
atomic_dec(&rq->i915->gt_pm.rps.num_waiters); atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
@ -244,10 +246,11 @@ static void i915_request_retire(struct i915_request *request)
{ {
struct i915_gem_active *active, *next; struct i915_gem_active *active, *next;
GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
request->engine->name, request->engine->name,
request->fence.context, request->fence.seqno, request->fence.context, request->fence.seqno,
request->global_seqno, request->global_seqno,
hwsp_seqno(request),
intel_engine_get_seqno(request->engine)); intel_engine_get_seqno(request->engine));
lockdep_assert_held(&request->i915->drm.struct_mutex); lockdep_assert_held(&request->i915->drm.struct_mutex);
@ -307,10 +310,11 @@ void i915_request_retire_upto(struct i915_request *rq)
struct intel_ring *ring = rq->ring; struct intel_ring *ring = rq->ring;
struct i915_request *tmp; struct i915_request *tmp;
GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
rq->engine->name, rq->engine->name,
rq->fence.context, rq->fence.seqno, rq->fence.context, rq->fence.seqno,
rq->global_seqno, rq->global_seqno,
hwsp_seqno(rq),
intel_engine_get_seqno(rq->engine)); intel_engine_get_seqno(rq->engine));
lockdep_assert_held(&rq->i915->drm.struct_mutex); lockdep_assert_held(&rq->i915->drm.struct_mutex);
@ -329,7 +333,7 @@ void i915_request_retire_upto(struct i915_request *rq)
static u32 timeline_get_seqno(struct i915_timeline *tl) static u32 timeline_get_seqno(struct i915_timeline *tl)
{ {
return ++tl->seqno; return tl->seqno += 1 + tl->has_initial_breadcrumb;
} }
static void move_to_timeline(struct i915_request *request, static void move_to_timeline(struct i915_request *request,
@ -355,10 +359,11 @@ void __i915_request_submit(struct i915_request *request)
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
u32 seqno; u32 seqno;
GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n",
engine->name, engine->name,
request->fence.context, request->fence.seqno, request->fence.context, request->fence.seqno,
engine->timeline.seqno + 1, engine->timeline.seqno + 1,
hwsp_seqno(request),
intel_engine_get_seqno(engine)); intel_engine_get_seqno(engine));
GEM_BUG_ON(!irqs_disabled()); GEM_BUG_ON(!irqs_disabled());
@ -372,20 +377,21 @@ void __i915_request_submit(struct i915_request *request)
/* We may be recursing from the signal callback of another i915 fence */ /* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
request->global_seqno = seqno; request->global_seqno = seqno;
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
intel_engine_enable_signaling(request, false); !i915_request_enable_breadcrumb(request))
intel_engine_queue_breadcrumbs(engine);
spin_unlock(&request->lock); spin_unlock(&request->lock);
engine->emit_breadcrumb(request, engine->emit_fini_breadcrumb(request,
request->ring->vaddr + request->postfix); request->ring->vaddr + request->postfix);
/* Transfer from per-context onto the global per-engine timeline */ /* Transfer from per-context onto the global per-engine timeline */
move_to_timeline(request, &engine->timeline); move_to_timeline(request, &engine->timeline);
trace_i915_request_execute(request); trace_i915_request_execute(request);
wake_up_all(&request->execute);
} }
void i915_request_submit(struct i915_request *request) void i915_request_submit(struct i915_request *request)
@ -405,10 +411,11 @@ void __i915_request_unsubmit(struct i915_request *request)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n",
engine->name, engine->name,
request->fence.context, request->fence.seqno, request->fence.context, request->fence.seqno,
request->global_seqno, request->global_seqno,
hwsp_seqno(request),
intel_engine_get_seqno(engine)); intel_engine_get_seqno(engine));
GEM_BUG_ON(!irqs_disabled()); GEM_BUG_ON(!irqs_disabled());
@ -427,7 +434,9 @@ void __i915_request_unsubmit(struct i915_request *request)
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
request->global_seqno = 0; request->global_seqno = 0;
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
intel_engine_cancel_signaling(request); i915_request_cancel_breadcrumb(request);
GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
spin_unlock(&request->lock); spin_unlock(&request->lock);
/* Transfer back from the global per-engine timeline to per-context */ /* Transfer back from the global per-engine timeline to per-context */
@ -616,6 +625,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
rq->ring = ce->ring; rq->ring = ce->ring;
rq->timeline = ce->ring->timeline; rq->timeline = ce->ring->timeline;
GEM_BUG_ON(rq->timeline == &engine->timeline); GEM_BUG_ON(rq->timeline == &engine->timeline);
rq->hwsp_seqno = rq->timeline->hwsp_seqno;
spin_lock_init(&rq->lock); spin_lock_init(&rq->lock);
dma_fence_init(&rq->fence, dma_fence_init(&rq->fence,
@ -626,13 +636,11 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
/* We bump the ref for the fence chain */ /* We bump the ref for the fence chain */
i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
init_waitqueue_head(&rq->execute);
i915_sched_node_init(&rq->sched); i915_sched_node_init(&rq->sched);
/* No zalloc, must clear what we need by hand */ /* No zalloc, must clear what we need by hand */
rq->global_seqno = 0; rq->global_seqno = 0;
rq->signaling.wait.seqno = 0;
rq->file_priv = NULL; rq->file_priv = NULL;
rq->batch = NULL; rq->batch = NULL;
rq->capture_list = NULL; rq->capture_list = NULL;
@ -650,7 +658,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
* around inside i915_request_add() there is sufficient space at * around inside i915_request_add() there is sufficient space at
* the beginning of the ring as well. * the beginning of the ring as well.
*/ */
rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32); rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32);
/* /*
* Record the position of the start of the request so that * Record the position of the start of the request so that
@ -901,7 +909,7 @@ void i915_request_add(struct i915_request *request)
* GPU processing the request, we never over-estimate the * GPU processing the request, we never over-estimate the
* position of the ring's HEAD. * position of the ring's HEAD.
*/ */
cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw);
GEM_BUG_ON(IS_ERR(cs)); GEM_BUG_ON(IS_ERR(cs));
request->postfix = intel_ring_offset(request, cs); request->postfix = intel_ring_offset(request, cs);
@ -1023,13 +1031,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
return this_cpu != cpu; return this_cpu != cpu;
} }
static bool __i915_spin_request(const struct i915_request *rq, static bool __i915_spin_request(const struct i915_request * const rq,
u32 seqno, int state, unsigned long timeout_us) int state, unsigned long timeout_us)
{ {
struct intel_engine_cs *engine = rq->engine; unsigned int cpu;
unsigned int irq, cpu;
GEM_BUG_ON(!seqno);
/* /*
* Only wait for the request if we know it is likely to complete. * Only wait for the request if we know it is likely to complete.
@ -1037,12 +1042,12 @@ static bool __i915_spin_request(const struct i915_request *rq,
* We don't track the timestamps around requests, nor the average * We don't track the timestamps around requests, nor the average
* request length, so we do not have a good indicator that this * request length, so we do not have a good indicator that this
* request will complete within the timeout. What we do know is the * request will complete within the timeout. What we do know is the
* order in which requests are executed by the engine and so we can * order in which requests are executed by the context and so we can
* tell if the request has started. If the request hasn't started yet, * tell if the request has been started. If the request is not even
* it is a fair assumption that it will not complete within our * running yet, it is a fair assumption that it will not complete
* relatively short timeout. * within our relatively short timeout.
*/ */
if (!intel_engine_has_started(engine, seqno)) if (!i915_request_is_running(rq))
return false; return false;
/* /*
@ -1056,20 +1061,10 @@ static bool __i915_spin_request(const struct i915_request *rq,
* takes to sleep on a request, on the order of a microsecond. * takes to sleep on a request, on the order of a microsecond.
*/ */
irq = READ_ONCE(engine->breadcrumbs.irq_count);
timeout_us += local_clock_us(&cpu); timeout_us += local_clock_us(&cpu);
do { do {
if (intel_engine_has_completed(engine, seqno)) if (i915_request_completed(rq))
return seqno == i915_request_global_seqno(rq); return true;
/*
* Seqno are meant to be ordered *before* the interrupt. If
* we see an interrupt without a corresponding seqno advance,
* assume we won't see one in the near future but require
* the engine->seqno_barrier() to fixup coherency.
*/
if (READ_ONCE(engine->breadcrumbs.irq_count) != irq)
break;
if (signal_pending_state(state, current)) if (signal_pending_state(state, current))
break; break;
@ -1083,16 +1078,16 @@ static bool __i915_spin_request(const struct i915_request *rq,
return false; return false;
} }
static bool __i915_wait_request_check_and_reset(struct i915_request *request) struct request_wait {
struct dma_fence_cb cb;
struct task_struct *tsk;
};
static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
{ {
struct i915_gpu_error *error = &request->i915->gpu_error; struct request_wait *wait = container_of(cb, typeof(*wait), cb);
if (likely(!i915_reset_handoff(error))) wake_up_process(wait->tsk);
return false;
__set_current_state(TASK_RUNNING);
i915_reset(request->i915, error->stalled_mask, error->reason);
return true;
} }
/** /**
@ -1120,17 +1115,9 @@ long i915_request_wait(struct i915_request *rq,
{ {
const int state = flags & I915_WAIT_INTERRUPTIBLE ? const int state = flags & I915_WAIT_INTERRUPTIBLE ?
TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; struct request_wait wait;
DEFINE_WAIT_FUNC(reset, default_wake_function);
DEFINE_WAIT_FUNC(exec, default_wake_function);
struct intel_wait wait;
might_sleep(); might_sleep();
#if IS_ENABLED(CONFIG_LOCKDEP)
GEM_BUG_ON(debug_locks &&
!!lockdep_is_held(&rq->i915->drm.struct_mutex) !=
!!(flags & I915_WAIT_LOCKED));
#endif
GEM_BUG_ON(timeout < 0); GEM_BUG_ON(timeout < 0);
if (i915_request_completed(rq)) if (i915_request_completed(rq))
@ -1141,112 +1128,41 @@ long i915_request_wait(struct i915_request *rq,
trace_i915_request_wait_begin(rq, flags); trace_i915_request_wait_begin(rq, flags);
add_wait_queue(&rq->execute, &exec); /* Optimistic short spin before touching IRQs */
if (flags & I915_WAIT_LOCKED) if (__i915_spin_request(rq, state, 5))
add_wait_queue(errq, &reset); goto out;
intel_wait_init(&wait);
if (flags & I915_WAIT_PRIORITY) if (flags & I915_WAIT_PRIORITY)
i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
restart: wait.tsk = current;
do { if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
set_current_state(state); goto out;
if (intel_wait_update_request(&wait, rq))
break;
if (flags & I915_WAIT_LOCKED &&
__i915_wait_request_check_and_reset(rq))
continue;
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
goto complete;
}
if (!timeout) {
timeout = -ETIME;
goto complete;
}
timeout = io_schedule_timeout(timeout);
} while (1);
GEM_BUG_ON(!intel_wait_has_seqno(&wait));
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
/* Optimistic short spin before touching IRQs */
if (__i915_spin_request(rq, wait.seqno, state, 5))
goto complete;
set_current_state(state);
if (intel_engine_add_wait(rq->engine, &wait))
/*
* In order to check that we haven't missed the interrupt
* as we enabled it, we need to kick ourselves to do a
* coherent check on the seqno before we sleep.
*/
goto wakeup;
if (flags & I915_WAIT_LOCKED)
__i915_wait_request_check_and_reset(rq);
for (;;) { for (;;) {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
if (!timeout) {
timeout = -ETIME;
break;
}
timeout = io_schedule_timeout(timeout);
if (intel_wait_complete(&wait) &&
intel_wait_check_request(&wait, rq))
break;
set_current_state(state); set_current_state(state);
wakeup:
if (i915_request_completed(rq)) if (i915_request_completed(rq))
break; break;
/* if (signal_pending_state(state, current)) {
* If the GPU is hung, and we hold the lock, reset the GPU timeout = -ERESTARTSYS;
* and then check for completion. On a full reset, the engine's
* HW seqno will be advanced passed us and we are complete.
* If we do a partial reset, we have to wait for the GPU to
* resume and update the breadcrumb.
*
* If we don't hold the mutex, we can just wait for the worker
* to come along and update the breadcrumb (either directly
* itself, or indirectly by recovering the GPU).
*/
if (flags & I915_WAIT_LOCKED &&
__i915_wait_request_check_and_reset(rq))
continue;
/* Only spin if we know the GPU is processing this request */
if (__i915_spin_request(rq, wait.seqno, state, 2))
break; break;
if (!intel_wait_check_request(&wait, rq)) {
intel_engine_remove_wait(rq->engine, &wait);
goto restart;
}
} }
intel_engine_remove_wait(rq->engine, &wait); if (!timeout) {
complete: timeout = -ETIME;
break;
}
timeout = io_schedule_timeout(timeout);
}
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
if (flags & I915_WAIT_LOCKED)
remove_wait_queue(errq, &reset);
remove_wait_queue(&rq->execute, &exec);
trace_i915_request_wait_end(rq);
dma_fence_remove_callback(&rq->fence, &wait.cb);
out:
trace_i915_request_wait_end(rq);
return timeout; return timeout;
} }

View file

@ -38,23 +38,34 @@ struct drm_i915_gem_object;
struct i915_request; struct i915_request;
struct i915_timeline; struct i915_timeline;
struct intel_wait {
struct rb_node node;
struct task_struct *tsk;
struct i915_request *request;
u32 seqno;
};
struct intel_signal_node {
struct intel_wait wait;
struct list_head link;
};
struct i915_capture_list { struct i915_capture_list {
struct i915_capture_list *next; struct i915_capture_list *next;
struct i915_vma *vma; struct i915_vma *vma;
}; };
enum {
/*
* I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW.
*
* Set by __i915_request_submit() on handing over to HW, and cleared
* by __i915_request_unsubmit() if we preempt this request.
*
* Finally cleared for consistency on retiring the request, when
* we know the HW is no longer running this request.
*
* See i915_request_is_active()
*/
I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
/*
* I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
*
* Internal bookkeeping used by the breadcrumb code to track when
* a request is on the various signal_list.
*/
I915_FENCE_FLAG_SIGNAL,
};
/** /**
* Request queue structure. * Request queue structure.
* *
@ -97,7 +108,7 @@ struct i915_request {
struct intel_context *hw_context; struct intel_context *hw_context;
struct intel_ring *ring; struct intel_ring *ring;
struct i915_timeline *timeline; struct i915_timeline *timeline;
struct intel_signal_node signaling; struct list_head signal_link;
/* /*
* The rcu epoch of when this request was allocated. Used to judiciously * The rcu epoch of when this request was allocated. Used to judiciously
@ -116,7 +127,6 @@ struct i915_request {
*/ */
struct i915_sw_fence submit; struct i915_sw_fence submit;
wait_queue_entry_t submitq; wait_queue_entry_t submitq;
wait_queue_head_t execute;
/* /*
* A list of everyone we wait upon, and everyone who waits upon us. * A list of everyone we wait upon, and everyone who waits upon us.
@ -130,6 +140,13 @@ struct i915_request {
struct i915_sched_node sched; struct i915_sched_node sched;
struct i915_dependency dep; struct i915_dependency dep;
/*
* A convenience pointer to the current breadcrumb value stored in
* the HW status page (or our timeline's local equivalent). The full
* path would be rq->hw_context->ring->timeline->hwsp_seqno.
*/
const u32 *hwsp_seqno;
/** /**
* GEM sequence number associated with this request on the * GEM sequence number associated with this request on the
* global execution timeline. It is zero when the request is not * global execution timeline. It is zero when the request is not
@ -248,7 +265,7 @@ i915_request_put(struct i915_request *rq)
* that it has passed the global seqno and the global seqno is unchanged * that it has passed the global seqno and the global seqno is unchanged
* after the read, it is indeed complete). * after the read, it is indeed complete).
*/ */
static u32 static inline u32
i915_request_global_seqno(const struct i915_request *request) i915_request_global_seqno(const struct i915_request *request)
{ {
return READ_ONCE(request->global_seqno); return READ_ONCE(request->global_seqno);
@ -270,6 +287,10 @@ void i915_request_skip(struct i915_request *request, int error);
void __i915_request_unsubmit(struct i915_request *request); void __i915_request_unsubmit(struct i915_request *request);
void i915_request_unsubmit(struct i915_request *request); void i915_request_unsubmit(struct i915_request *request);
/* Note: part of the intel_breadcrumbs family */
bool i915_request_enable_breadcrumb(struct i915_request *request);
void i915_request_cancel_breadcrumb(struct i915_request *request);
long i915_request_wait(struct i915_request *rq, long i915_request_wait(struct i915_request *rq,
unsigned int flags, unsigned int flags,
long timeout) long timeout)
@ -282,13 +303,14 @@ long i915_request_wait(struct i915_request *rq,
static inline bool i915_request_signaled(const struct i915_request *rq) static inline bool i915_request_signaled(const struct i915_request *rq)
{ {
/* The request may live longer than its HWSP, so check flags first! */
return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags); return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
} }
static inline bool intel_engine_has_started(struct intel_engine_cs *engine, static inline bool i915_request_is_active(const struct i915_request *rq)
u32 seqno); {
static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
u32 seqno); }
/** /**
* Returns true if seq1 is later than seq2. * Returns true if seq1 is later than seq2.
@ -298,6 +320,40 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
return (s32)(seq1 - seq2) >= 0; return (s32)(seq1 - seq2) >= 0;
} }
static inline u32 __hwsp_seqno(const struct i915_request *rq)
{
return READ_ONCE(*rq->hwsp_seqno);
}
/**
* hwsp_seqno - the current breadcrumb value in the HW status page
* @rq: the request, to chase the relevant HW status page
*
* The emphasis in naming here is that hwsp_seqno() is not a property of the
* request, but an indication of the current HW state (associated with this
* request). Its value will change as the GPU executes more requests.
*
* Returns the current breadcrumb value in the associated HW status page (or
* the local timeline's equivalent) for this request. The request itself
* has the associated breadcrumb value of rq->fence.seqno, when the HW
* status page has that breadcrumb or later, this request is complete.
*/
static inline u32 hwsp_seqno(const struct i915_request *rq)
{
u32 seqno;
rcu_read_lock(); /* the HWSP may be freed at runtime */
seqno = __hwsp_seqno(rq);
rcu_read_unlock();
return seqno;
}
static inline bool __i915_request_has_started(const struct i915_request *rq)
{
return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
}
/** /**
* i915_request_started - check if the request has begun being executed * i915_request_started - check if the request has begun being executed
* @rq: the request * @rq: the request
@ -309,32 +365,40 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
*/ */
static inline bool i915_request_started(const struct i915_request *rq) static inline bool i915_request_started(const struct i915_request *rq)
{ {
u32 seqno; if (i915_request_signaled(rq))
return true;
seqno = i915_request_global_seqno(rq); /* Remember: started but may have since been preempted! */
if (!seqno) /* not yet submitted to HW */ return __i915_request_has_started(rq);
return false;
return intel_engine_has_started(rq->engine, seqno);
} }
static inline bool /**
__i915_request_completed(const struct i915_request *rq, u32 seqno) * i915_request_is_running - check if the request may actually be executing
* @rq: the request
*
* Returns true if the request is currently submitted to hardware, has passed
* its start point (i.e. the context is setup and not busywaiting). Note that
* it may no longer be running by the time the function returns!
*/
static inline bool i915_request_is_running(const struct i915_request *rq)
{ {
GEM_BUG_ON(!seqno); if (!i915_request_is_active(rq))
return intel_engine_has_completed(rq->engine, seqno) && return false;
seqno == i915_request_global_seqno(rq);
return __i915_request_has_started(rq);
} }
static inline bool i915_request_completed(const struct i915_request *rq) static inline bool i915_request_completed(const struct i915_request *rq)
{ {
u32 seqno; if (i915_request_signaled(rq))
return true;
seqno = i915_request_global_seqno(rq); return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
if (!seqno) }
return false;
return __i915_request_completed(rq, seqno); static inline void i915_request_mark_complete(struct i915_request *rq)
{
rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */
} }
void i915_retire_requests(struct drm_i915_private *i915); void i915_retire_requests(struct drm_i915_private *i915);

View file

@ -5,6 +5,7 @@
*/ */
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/stop_machine.h>
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_gpu_error.h" #include "i915_gpu_error.h"
@ -12,27 +13,33 @@
#include "intel_guc.h" #include "intel_guc.h"
#define RESET_MAX_RETRIES 3
/* XXX How to handle concurrent GGTT updates using tiling registers? */
#define RESET_UNDER_STOP_MACHINE 0
static void engine_skip_context(struct i915_request *rq) static void engine_skip_context(struct i915_request *rq)
{ {
struct intel_engine_cs *engine = rq->engine; struct intel_engine_cs *engine = rq->engine;
struct i915_gem_context *hung_ctx = rq->gem_context; struct i915_gem_context *hung_ctx = rq->gem_context;
struct i915_timeline *timeline = rq->timeline; struct i915_timeline *timeline = rq->timeline;
unsigned long flags;
lockdep_assert_held(&engine->timeline.lock);
GEM_BUG_ON(timeline == &engine->timeline); GEM_BUG_ON(timeline == &engine->timeline);
spin_lock_irqsave(&engine->timeline.lock, flags);
spin_lock(&timeline->lock); spin_lock(&timeline->lock);
list_for_each_entry_continue(rq, &engine->timeline.requests, link) if (i915_request_is_active(rq)) {
list_for_each_entry_continue(rq,
&engine->timeline.requests, link)
if (rq->gem_context == hung_ctx) if (rq->gem_context == hung_ctx)
i915_request_skip(rq, -EIO); i915_request_skip(rq, -EIO);
}
list_for_each_entry(rq, &timeline->requests, link) list_for_each_entry(rq, &timeline->requests, link)
i915_request_skip(rq, -EIO); i915_request_skip(rq, -EIO);
spin_unlock(&timeline->lock); spin_unlock(&timeline->lock);
spin_unlock_irqrestore(&engine->timeline.lock, flags);
} }
static void client_mark_guilty(struct drm_i915_file_private *file_priv, static void client_mark_guilty(struct drm_i915_file_private *file_priv,
@ -59,7 +66,7 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
} }
} }
static void context_mark_guilty(struct i915_gem_context *ctx) static bool context_mark_guilty(struct i915_gem_context *ctx)
{ {
unsigned int score; unsigned int score;
bool banned, bannable; bool banned, bannable;
@ -72,7 +79,7 @@ static void context_mark_guilty(struct i915_gem_context *ctx)
/* Cool contexts don't accumulate client ban score */ /* Cool contexts don't accumulate client ban score */
if (!bannable) if (!bannable)
return; return false;
if (banned) { if (banned) {
DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
@ -83,6 +90,8 @@ static void context_mark_guilty(struct i915_gem_context *ctx)
if (!IS_ERR_OR_NULL(ctx->file_priv)) if (!IS_ERR_OR_NULL(ctx->file_priv))
client_mark_guilty(ctx->file_priv, ctx); client_mark_guilty(ctx->file_priv, ctx);
return banned;
} }
static void context_mark_innocent(struct i915_gem_context *ctx) static void context_mark_innocent(struct i915_gem_context *ctx)
@ -90,6 +99,21 @@ static void context_mark_innocent(struct i915_gem_context *ctx)
atomic_inc(&ctx->active_count); atomic_inc(&ctx->active_count);
} }
void i915_reset_request(struct i915_request *rq, bool guilty)
{
lockdep_assert_held(&rq->engine->timeline.lock);
GEM_BUG_ON(i915_request_completed(rq));
if (guilty) {
i915_request_skip(rq, -EIO);
if (context_mark_guilty(rq->gem_context))
engine_skip_context(rq);
} else {
dma_fence_set_error(&rq->fence, -EAGAIN);
context_mark_innocent(rq->gem_context);
}
}
static void gen3_stop_engine(struct intel_engine_cs *engine) static void gen3_stop_engine(struct intel_engine_cs *engine)
{ {
struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_private *dev_priv = engine->i915;
@ -144,14 +168,14 @@ static int i915_do_reset(struct drm_i915_private *i915,
/* Assert reset for at least 20 usec, and wait for acknowledgement. */ /* Assert reset for at least 20 usec, and wait for acknowledgement. */
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
usleep_range(50, 200); udelay(50);
err = wait_for(i915_in_reset(pdev), 500); err = wait_for_atomic(i915_in_reset(pdev), 50);
/* Clear the reset request. */ /* Clear the reset request. */
pci_write_config_byte(pdev, I915_GDRST, 0); pci_write_config_byte(pdev, I915_GDRST, 0);
usleep_range(50, 200); udelay(50);
if (!err) if (!err)
err = wait_for(!i915_in_reset(pdev), 500); err = wait_for_atomic(!i915_in_reset(pdev), 50);
return err; return err;
} }
@ -171,7 +195,7 @@ static int g33_do_reset(struct drm_i915_private *i915,
struct pci_dev *pdev = i915->drm.pdev; struct pci_dev *pdev = i915->drm.pdev;
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
return wait_for(g4x_reset_complete(pdev), 500); return wait_for_atomic(g4x_reset_complete(pdev), 50);
} }
static int g4x_do_reset(struct drm_i915_private *dev_priv, static int g4x_do_reset(struct drm_i915_private *dev_priv,
@ -182,13 +206,13 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
int ret; int ret;
/* WaVcpClkGateDisableForMediaReset:ctg,elk */ /* WaVcpClkGateDisableForMediaReset:ctg,elk */
I915_WRITE(VDECCLK_GATE_D, I915_WRITE_FW(VDECCLK_GATE_D,
I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
POSTING_READ(VDECCLK_GATE_D); POSTING_READ_FW(VDECCLK_GATE_D);
pci_write_config_byte(pdev, I915_GDRST, pci_write_config_byte(pdev, I915_GDRST,
GRDOM_MEDIA | GRDOM_RESET_ENABLE); GRDOM_MEDIA | GRDOM_RESET_ENABLE);
ret = wait_for(g4x_reset_complete(pdev), 500); ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
if (ret) { if (ret) {
DRM_DEBUG_DRIVER("Wait for media reset failed\n"); DRM_DEBUG_DRIVER("Wait for media reset failed\n");
goto out; goto out;
@ -196,7 +220,7 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
pci_write_config_byte(pdev, I915_GDRST, pci_write_config_byte(pdev, I915_GDRST,
GRDOM_RENDER | GRDOM_RESET_ENABLE); GRDOM_RENDER | GRDOM_RESET_ENABLE);
ret = wait_for(g4x_reset_complete(pdev), 500); ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
if (ret) { if (ret) {
DRM_DEBUG_DRIVER("Wait for render reset failed\n"); DRM_DEBUG_DRIVER("Wait for render reset failed\n");
goto out; goto out;
@ -205,9 +229,9 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
out: out:
pci_write_config_byte(pdev, I915_GDRST, 0); pci_write_config_byte(pdev, I915_GDRST, 0);
I915_WRITE(VDECCLK_GATE_D, I915_WRITE_FW(VDECCLK_GATE_D,
I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
POSTING_READ(VDECCLK_GATE_D); POSTING_READ_FW(VDECCLK_GATE_D);
return ret; return ret;
} }
@ -218,27 +242,29 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv,
{ {
int ret; int ret;
I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
ret = intel_wait_for_register(dev_priv, ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR,
ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, ILK_GRDOM_RESET_ENABLE, 0,
500); 5000, 0,
NULL);
if (ret) { if (ret) {
DRM_DEBUG_DRIVER("Wait for render reset failed\n"); DRM_DEBUG_DRIVER("Wait for render reset failed\n");
goto out; goto out;
} }
I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
ret = intel_wait_for_register(dev_priv, ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR,
ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, ILK_GRDOM_RESET_ENABLE, 0,
500); 5000, 0,
NULL);
if (ret) { if (ret) {
DRM_DEBUG_DRIVER("Wait for media reset failed\n"); DRM_DEBUG_DRIVER("Wait for media reset failed\n");
goto out; goto out;
} }
out: out:
I915_WRITE(ILK_GDSR, 0); I915_WRITE_FW(ILK_GDSR, 0);
POSTING_READ(ILK_GDSR); POSTING_READ_FW(ILK_GDSR);
return ret; return ret;
} }
@ -527,32 +553,21 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
{ {
reset_func reset = intel_get_gpu_reset(i915); const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
reset_func reset;
int ret = -ETIMEDOUT;
int retry; int retry;
int ret;
/* reset = intel_get_gpu_reset(i915);
* We want to perform per-engine reset from atomic context (e.g. if (!reset)
* softirq), which imposes the constraint that we cannot sleep. return -ENODEV;
* However, experience suggests that spending a bit of time waiting
* for a reset helps in various cases, so for a full-device reset
* we apply the opposite rule and wait if we want to. As we should
* always follow up a failed per-engine reset with a full device reset,
* being a little faster, stricter and more error prone for the
* atomic case seems an acceptable compromise.
*
* Unfortunately this leads to a bimodal routine, when the goal was
* to have a single reset function that worked for resetting any
* number of engines simultaneously.
*/
might_sleep_if(engine_mask == ALL_ENGINES);
/* /*
* If the power well sleeps during the reset, the reset * If the power well sleeps during the reset, the reset
* request may be dropped and never completes (causing -EIO). * request may be dropped and never completes (causing -EIO).
*/ */
intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
for (retry = 0; retry < 3; retry++) { for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
/* /*
* We stop engines, otherwise we might get failed reset and a * We stop engines, otherwise we might get failed reset and a
* dead gpu (on elk). Also as modern gpu as kbl can suffer * dead gpu (on elk). Also as modern gpu as kbl can suffer
@ -569,15 +584,10 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
*/ */
i915_stop_engines(i915, engine_mask); i915_stop_engines(i915, engine_mask);
ret = -ENODEV;
if (reset) {
GEM_TRACE("engine_mask=%x\n", engine_mask); GEM_TRACE("engine_mask=%x\n", engine_mask);
preempt_disable();
ret = reset(i915, engine_mask, retry); ret = reset(i915, engine_mask, retry);
} preempt_enable();
if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
break;
cond_resched();
} }
intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
@ -586,6 +596,9 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
bool intel_has_gpu_reset(struct drm_i915_private *i915) bool intel_has_gpu_reset(struct drm_i915_private *i915)
{ {
if (USES_GUC(i915))
return false;
return intel_get_gpu_reset(i915); return intel_get_gpu_reset(i915);
} }
@ -613,11 +626,8 @@ int intel_reset_guc(struct drm_i915_private *i915)
* Ensure irq handler finishes, and not run again. * Ensure irq handler finishes, and not run again.
* Also return the active request so that we only search for it once. * Also return the active request so that we only search for it once.
*/ */
static struct i915_request * static void reset_prepare_engine(struct intel_engine_cs *engine)
reset_prepare_engine(struct intel_engine_cs *engine)
{ {
struct i915_request *rq;
/* /*
* During the reset sequence, we must prevent the engine from * During the reset sequence, we must prevent the engine from
* entering RC6. As the context state is undefined until we restart * entering RC6. As the context state is undefined until we restart
@ -626,190 +636,135 @@ reset_prepare_engine(struct intel_engine_cs *engine)
* GPU state upon resume, i.e. fail to restart after a reset. * GPU state upon resume, i.e. fail to restart after a reset.
*/ */
intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
engine->reset.prepare(engine);
rq = engine->reset.prepare(engine);
if (rq && rq->fence.error == -EIO)
rq = ERR_PTR(-EIO); /* Previous reset failed! */
return rq;
} }
static int reset_prepare(struct drm_i915_private *i915) static void reset_prepare(struct drm_i915_private *i915)
{ {
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
struct i915_request *rq;
enum intel_engine_id id; enum intel_engine_id id;
int err = 0;
for_each_engine(engine, i915, id) { for_each_engine(engine, i915, id)
rq = reset_prepare_engine(engine); reset_prepare_engine(engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
continue;
}
engine->hangcheck.active_request = rq;
}
i915_gem_revoke_fences(i915);
intel_uc_sanitize(i915); intel_uc_sanitize(i915);
return err;
} }
/* Returns the request if it was guilty of the hang */ static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
static struct i915_request *
reset_request(struct intel_engine_cs *engine,
struct i915_request *rq,
bool stalled)
{
/*
* The guilty request will get skipped on a hung engine.
*
* Users of client default contexts do not rely on logical
* state preserved between batches so it is safe to execute
* queued requests following the hang. Non default contexts
* rely on preserved state, so skipping a batch loses the
* evolution of the state and it needs to be considered corrupted.
* Executing more queued batches on top of corrupted state is
* risky. But we take the risk by trying to advance through
* the queued requests in order to make the client behaviour
* more predictable around resets, by not throwing away random
* amount of batches it has prepared for execution. Sophisticated
* clients can use gem_reset_stats_ioctl and dma fence status
* (exported via sync_file info ioctl on explicit fences) to observe
* when it loses the context state and should rebuild accordingly.
*
* The context ban, and ultimately the client ban, mechanism are safety
* valves if client submission ends up resulting in nothing more than
* subsequent hangs.
*/
if (i915_request_completed(rq)) {
GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n",
engine->name, rq->global_seqno,
rq->fence.context, rq->fence.seqno,
intel_engine_get_seqno(engine));
stalled = false;
}
if (stalled) {
context_mark_guilty(rq->gem_context);
i915_request_skip(rq, -EIO);
/* If this context is now banned, skip all pending requests. */
if (i915_gem_context_is_banned(rq->gem_context))
engine_skip_context(rq);
} else {
/*
* Since this is not the hung engine, it may have advanced
* since the hang declaration. Double check by refinding
* the active request at the time of the reset.
*/
rq = i915_gem_find_active_request(engine);
if (rq) {
unsigned long flags;
context_mark_innocent(rq->gem_context);
dma_fence_set_error(&rq->fence, -EAGAIN);
/* Rewind the engine to replay the incomplete rq */
spin_lock_irqsave(&engine->timeline.lock, flags);
rq = list_prev_entry(rq, link);
if (&rq->link == &engine->timeline.requests)
rq = NULL;
spin_unlock_irqrestore(&engine->timeline.lock, flags);
}
}
return rq;
}
static void reset_engine(struct intel_engine_cs *engine,
struct i915_request *rq,
bool stalled)
{
if (rq)
rq = reset_request(engine, rq, stalled);
/* Setup the CS to resume from the breadcrumb of the hung request */
engine->reset.reset(engine, rq);
}
static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
{ {
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
enum intel_engine_id id; enum intel_engine_id id;
int err;
lockdep_assert_held(&i915->drm.struct_mutex);
i915_retire_requests(i915);
for_each_engine(engine, i915, id) {
struct intel_context *ce;
reset_engine(engine,
engine->hangcheck.active_request,
stalled_mask & ENGINE_MASK(id));
ce = fetch_and_zero(&engine->last_retired_context);
if (ce)
intel_context_unpin(ce);
/* /*
* Ostensibily, we always want a context loaded for powersaving, * Everything depends on having the GTT running, so we need to start
* so if the engine is idle after the reset, send a request * there.
* to load our scratch kernel_context.
*
* More mysteriously, if we leave the engine idle after a reset,
* the next userspace batch may hang, with what appears to be
* an incoherent read by the CS (presumably stale TLB). An
* empty request appears sufficient to paper over the glitch.
*/ */
if (intel_engine_is_idle(engine)) { err = i915_ggtt_enable_hw(i915);
struct i915_request *rq; if (err)
return err;
rq = i915_request_alloc(engine, i915->kernel_context); for_each_engine(engine, i915, id)
if (!IS_ERR(rq)) intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id));
i915_request_add(rq);
}
}
i915_gem_restore_fences(i915); i915_gem_restore_fences(i915);
return err;
} }
static void reset_finish_engine(struct intel_engine_cs *engine) static void reset_finish_engine(struct intel_engine_cs *engine)
{ {
engine->reset.finish(engine); engine->reset.finish(engine);
intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
} }
struct i915_gpu_restart {
struct work_struct work;
struct drm_i915_private *i915;
};
static void restart_work(struct work_struct *work)
{
struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
struct drm_i915_private *i915 = arg->i915;
struct intel_engine_cs *engine;
enum intel_engine_id id;
intel_wakeref_t wakeref;
wakeref = intel_runtime_pm_get(i915);
mutex_lock(&i915->drm.struct_mutex);
WRITE_ONCE(i915->gpu_error.restart, NULL);
for_each_engine(engine, i915, id) {
struct i915_request *rq;
/*
* Ostensibily, we always want a context loaded for powersaving,
* so if the engine is idle after the reset, send a request
* to load our scratch kernel_context.
*/
if (!intel_engine_is_idle(engine))
continue;
rq = i915_request_alloc(engine, i915->kernel_context);
if (!IS_ERR(rq))
i915_request_add(rq);
}
mutex_unlock(&i915->drm.struct_mutex);
intel_runtime_pm_put(i915, wakeref);
kfree(arg);
}
static void reset_finish(struct drm_i915_private *i915) static void reset_finish(struct drm_i915_private *i915)
{ {
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
enum intel_engine_id id; enum intel_engine_id id;
lockdep_assert_held(&i915->drm.struct_mutex); for_each_engine(engine, i915, id)
for_each_engine(engine, i915, id) {
engine->hangcheck.active_request = NULL;
reset_finish_engine(engine); reset_finish_engine(engine);
} }
static void reset_restart(struct drm_i915_private *i915)
{
struct i915_gpu_restart *arg;
/*
* Following the reset, ensure that we always reload context for
* powersaving, and to correct engine->last_retired_context. Since
* this requires us to submit a request, queue a worker to do that
* task for us to evade any locking here.
*/
if (READ_ONCE(i915->gpu_error.restart))
return;
arg = kmalloc(sizeof(*arg), GFP_KERNEL);
if (arg) {
arg->i915 = i915;
INIT_WORK(&arg->work, restart_work);
WRITE_ONCE(i915->gpu_error.restart, arg);
queue_work(i915->wq, &arg->work);
}
} }
static void nop_submit_request(struct i915_request *request) static void nop_submit_request(struct i915_request *request)
{ {
struct intel_engine_cs *engine = request->engine;
unsigned long flags; unsigned long flags;
GEM_TRACE("%s fence %llx:%lld -> -EIO\n", GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
request->engine->name, engine->name, request->fence.context, request->fence.seqno);
request->fence.context, request->fence.seqno);
dma_fence_set_error(&request->fence, -EIO); dma_fence_set_error(&request->fence, -EIO);
spin_lock_irqsave(&request->engine->timeline.lock, flags); spin_lock_irqsave(&engine->timeline.lock, flags);
__i915_request_submit(request); __i915_request_submit(request);
intel_engine_write_global_seqno(request->engine, request->global_seqno); i915_request_mark_complete(request);
spin_unlock_irqrestore(&request->engine->timeline.lock, flags); intel_engine_write_global_seqno(engine, request->global_seqno);
spin_unlock_irqrestore(&engine->timeline.lock, flags);
intel_engine_queue_breadcrumbs(engine);
} }
void i915_gem_set_wedged(struct drm_i915_private *i915) void i915_gem_set_wedged(struct drm_i915_private *i915)
@ -864,7 +819,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
for_each_engine(engine, i915, id) { for_each_engine(engine, i915, id) {
reset_finish_engine(engine); reset_finish_engine(engine);
intel_engine_wakeup(engine); intel_engine_signal_breadcrumbs(engine);
} }
smp_mb__before_atomic(); smp_mb__before_atomic();
@ -882,8 +837,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
struct i915_timeline *tl; struct i915_timeline *tl;
bool ret = false; bool ret = false;
lockdep_assert_held(&i915->drm.struct_mutex);
if (!test_bit(I915_WEDGED, &error->flags)) if (!test_bit(I915_WEDGED, &error->flags))
return true; return true;
@ -904,11 +857,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
* *
* No more can be submitted until we reset the wedged bit. * No more can be submitted until we reset the wedged bit.
*/ */
list_for_each_entry(tl, &i915->gt.timelines, link) { mutex_lock(&i915->gt.timelines.mutex);
list_for_each_entry(tl, &i915->gt.timelines.active_list, link) {
struct i915_request *rq; struct i915_request *rq;
long timeout;
rq = i915_gem_active_peek(&tl->last_request, rq = i915_gem_active_get_unlocked(&tl->last_request);
&i915->drm.struct_mutex);
if (!rq) if (!rq)
continue; continue;
@ -923,12 +877,15 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
* and when the seqno passes the fence, the signaler * and when the seqno passes the fence, the signaler
* then signals the fence waking us up). * then signals the fence waking us up).
*/ */
if (dma_fence_default_wait(&rq->fence, true, timeout = dma_fence_default_wait(&rq->fence, true,
MAX_SCHEDULE_TIMEOUT) < 0) MAX_SCHEDULE_TIMEOUT);
i915_request_put(rq);
if (timeout < 0) {
mutex_unlock(&i915->gt.timelines.mutex);
goto unlock; goto unlock;
} }
i915_retire_requests(i915); }
GEM_BUG_ON(i915->gt.active_requests); mutex_unlock(&i915->gt.timelines.mutex);
intel_engines_sanitize(i915, false); intel_engines_sanitize(i915, false);
@ -942,7 +899,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
* context and do not require stop_machine(). * context and do not require stop_machine().
*/ */
intel_engines_reset_default_submission(i915); intel_engines_reset_default_submission(i915);
i915_gem_contexts_lost(i915);
GEM_TRACE("end\n"); GEM_TRACE("end\n");
@ -955,6 +911,52 @@ unlock:
return ret; return ret;
} }
struct __i915_reset {
struct drm_i915_private *i915;
unsigned int stalled_mask;
};
static int __i915_reset__BKL(void *data)
{
struct __i915_reset *arg = data;
int err;
err = intel_gpu_reset(arg->i915, ALL_ENGINES);
if (err)
return err;
return gt_reset(arg->i915, arg->stalled_mask);
}
#if RESET_UNDER_STOP_MACHINE
/*
* XXX An alternative to using stop_machine would be to park only the
* processes that have a GGTT mmap. By remote parking the threads (SIGSTOP)
* we should be able to prevent their memmory accesses via the lost fence
* registers over the course of the reset without the potential recursive
* of mutexes between the pagefault handler and reset.
*
* See igt/gem_mmap_gtt/hang
*/
#define __do_reset(fn, arg) stop_machine(fn, arg, NULL)
#else
#define __do_reset(fn, arg) fn(arg)
#endif
static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
{
struct __i915_reset arg = { i915, stalled_mask };
int err, i;
err = __do_reset(__i915_reset__BKL, &arg);
for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
msleep(100);
err = __do_reset(__i915_reset__BKL, &arg);
}
return err;
}
/** /**
* i915_reset - reset chip after a hang * i915_reset - reset chip after a hang
* @i915: #drm_i915_private to reset * @i915: #drm_i915_private to reset
@ -980,31 +982,22 @@ void i915_reset(struct drm_i915_private *i915,
{ {
struct i915_gpu_error *error = &i915->gpu_error; struct i915_gpu_error *error = &i915->gpu_error;
int ret; int ret;
int i;
GEM_TRACE("flags=%lx\n", error->flags); GEM_TRACE("flags=%lx\n", error->flags);
might_sleep(); might_sleep();
lockdep_assert_held(&i915->drm.struct_mutex);
assert_rpm_wakelock_held(i915); assert_rpm_wakelock_held(i915);
GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
if (!test_bit(I915_RESET_HANDOFF, &error->flags))
return;
/* Clear any previous failed attempts at recovery. Time to try again. */ /* Clear any previous failed attempts at recovery. Time to try again. */
if (!i915_gem_unset_wedged(i915)) if (!i915_gem_unset_wedged(i915))
goto wakeup; return;
if (reason) if (reason)
dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
error->reset_count++; error->reset_count++;
ret = reset_prepare(i915); reset_prepare(i915);
if (ret) {
dev_err(i915->drm.dev, "GPU recovery failed\n");
goto taint;
}
if (!intel_has_gpu_reset(i915)) { if (!intel_has_gpu_reset(i915)) {
if (i915_modparams.reset) if (i915_modparams.reset)
@ -1014,32 +1007,11 @@ void i915_reset(struct drm_i915_private *i915,
goto error; goto error;
} }
for (i = 0; i < 3; i++) { if (do_reset(i915, stalled_mask)) {
ret = intel_gpu_reset(i915, ALL_ENGINES);
if (ret == 0)
break;
msleep(100);
}
if (ret) {
dev_err(i915->drm.dev, "Failed to reset chip\n"); dev_err(i915->drm.dev, "Failed to reset chip\n");
goto taint; goto taint;
} }
/* Ok, now get things going again... */
/*
* Everything depends on having the GTT running, so we need to start
* there.
*/
ret = i915_ggtt_enable_hw(i915);
if (ret) {
DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
ret);
goto error;
}
gt_reset(i915, stalled_mask);
intel_overlay_reset(i915); intel_overlay_reset(i915);
/* /*
@ -1061,9 +1033,8 @@ void i915_reset(struct drm_i915_private *i915,
finish: finish:
reset_finish(i915); reset_finish(i915);
wakeup: if (!i915_terminally_wedged(error))
clear_bit(I915_RESET_HANDOFF, &error->flags); reset_restart(i915);
wake_up_bit(&error->flags, I915_RESET_HANDOFF);
return; return;
taint: taint:
@ -1082,7 +1053,6 @@ taint:
add_taint(TAINT_WARN, LOCKDEP_STILL_OK); add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
error: error:
i915_gem_set_wedged(i915); i915_gem_set_wedged(i915);
i915_retire_requests(i915);
goto finish; goto finish;
} }
@ -1108,18 +1078,12 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
{ {
struct i915_gpu_error *error = &engine->i915->gpu_error; struct i915_gpu_error *error = &engine->i915->gpu_error;
struct i915_request *active_request;
int ret; int ret;
GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
active_request = reset_prepare_engine(engine); reset_prepare_engine(engine);
if (IS_ERR_OR_NULL(active_request)) {
/* Either the previous reset failed, or we pardon the reset. */
ret = PTR_ERR(active_request);
goto out;
}
if (msg) if (msg)
dev_notice(engine->i915->drm.dev, dev_notice(engine->i915->drm.dev,
@ -1143,7 +1107,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
* active request and can drop it, adjust head to skip the offending * active request and can drop it, adjust head to skip the offending
* request to resume executing remaining requests in the queue. * request to resume executing remaining requests in the queue.
*/ */
reset_engine(engine, active_request, true); intel_engine_reset(engine, true);
/* /*
* The engine and its registers (and workarounds in case of render) * The engine and its registers (and workarounds in case of render)
@ -1180,30 +1144,7 @@ static void i915_reset_device(struct drm_i915_private *i915,
i915_wedge_on_timeout(&w, i915, 5 * HZ) { i915_wedge_on_timeout(&w, i915, 5 * HZ) {
intel_prepare_reset(i915); intel_prepare_reset(i915);
error->reason = reason;
error->stalled_mask = engine_mask;
/* Signal that locked waiters should reset the GPU */
smp_mb__before_atomic();
set_bit(I915_RESET_HANDOFF, &error->flags);
wake_up_all(&error->wait_queue);
/*
* Wait for anyone holding the lock to wakeup, without
* blocking indefinitely on struct_mutex.
*/
do {
if (mutex_trylock(&i915->drm.struct_mutex)) {
i915_reset(i915, engine_mask, reason); i915_reset(i915, engine_mask, reason);
mutex_unlock(&i915->drm.struct_mutex);
}
} while (wait_on_bit_timeout(&error->flags,
I915_RESET_HANDOFF,
TASK_UNINTERRUPTIBLE,
1));
error->stalled_mask = 0;
error->reason = NULL;
intel_finish_reset(i915); intel_finish_reset(i915);
} }
@ -1359,6 +1300,25 @@ out:
intel_runtime_pm_put(i915, wakeref); intel_runtime_pm_put(i915, wakeref);
} }
bool i915_reset_flush(struct drm_i915_private *i915)
{
int err;
cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
flush_workqueue(i915->wq);
GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
mutex_lock(&i915->drm.struct_mutex);
err = i915_gem_wait_for_idle(i915,
I915_WAIT_LOCKED |
I915_WAIT_FOR_IDLE_BOOST,
MAX_SCHEDULE_TIMEOUT);
mutex_unlock(&i915->drm.struct_mutex);
return !err;
}
static void i915_wedge_me(struct work_struct *work) static void i915_wedge_me(struct work_struct *work)
{ {
struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);

View file

@ -29,6 +29,9 @@ void i915_reset(struct drm_i915_private *i915,
int i915_reset_engine(struct intel_engine_cs *engine, int i915_reset_engine(struct intel_engine_cs *engine,
const char *reason); const char *reason);
void i915_reset_request(struct i915_request *rq, bool guilty);
bool i915_reset_flush(struct drm_i915_private *i915);
bool intel_has_gpu_reset(struct drm_i915_private *i915); bool intel_has_gpu_reset(struct drm_i915_private *i915);
bool intel_has_reset_engine(struct drm_i915_private *i915); bool intel_has_reset_engine(struct drm_i915_private *i915);

View file

@ -127,8 +127,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
return rb_entry(rb, struct i915_priolist, node); return rb_entry(rb, struct i915_priolist, node);
} }
static void assert_priolists(struct intel_engine_execlists * const execlists, static void assert_priolists(struct intel_engine_execlists * const execlists)
long queue_priority)
{ {
struct rb_node *rb; struct rb_node *rb;
long last_prio, i; long last_prio, i;
@ -139,7 +138,7 @@ static void assert_priolists(struct intel_engine_execlists * const execlists,
GEM_BUG_ON(rb_first_cached(&execlists->queue) != GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
rb_first(&execlists->queue.rb_root)); rb_first(&execlists->queue.rb_root));
last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
const struct i915_priolist *p = to_priolist(rb); const struct i915_priolist *p = to_priolist(rb);
@ -166,7 +165,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
int idx, i; int idx, i;
lockdep_assert_held(&engine->timeline.lock); lockdep_assert_held(&engine->timeline.lock);
assert_priolists(execlists, INT_MAX); assert_priolists(execlists);
/* buckets sorted from highest [in slot 0] to lowest priority */ /* buckets sorted from highest [in slot 0] to lowest priority */
idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
@ -239,6 +238,18 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
return engine; return engine;
} }
static bool inflight(const struct i915_request *rq,
const struct intel_engine_cs *engine)
{
const struct i915_request *active;
if (!i915_request_is_active(rq))
return false;
active = port_request(engine->execlists.port);
return active->hw_context == rq->hw_context;
}
static void __i915_schedule(struct i915_request *rq, static void __i915_schedule(struct i915_request *rq,
const struct i915_sched_attr *attr) const struct i915_sched_attr *attr)
{ {
@ -328,6 +339,7 @@ static void __i915_schedule(struct i915_request *rq,
INIT_LIST_HEAD(&dep->dfs_link); INIT_LIST_HEAD(&dep->dfs_link);
engine = sched_lock_engine(node, engine); engine = sched_lock_engine(node, engine);
lockdep_assert_held(&engine->timeline.lock);
/* Recheck after acquiring the engine->timeline.lock */ /* Recheck after acquiring the engine->timeline.lock */
if (prio <= node->attr.priority || node_signaled(node)) if (prio <= node->attr.priority || node_signaled(node))
@ -353,20 +365,19 @@ static void __i915_schedule(struct i915_request *rq,
continue; continue;
} }
if (prio <= engine->execlists.queue_priority) if (prio <= engine->execlists.queue_priority_hint)
continue; continue;
engine->execlists.queue_priority_hint = prio;
/* /*
* If we are already the currently executing context, don't * If we are already the currently executing context, don't
* bother evaluating if we should preempt ourselves. * bother evaluating if we should preempt ourselves.
*/ */
if (node_to_request(node)->global_seqno && if (inflight(node_to_request(node), engine))
i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
node_to_request(node)->global_seqno))
continue; continue;
/* Defer (tasklet) submission until after all of our updates. */ /* Defer (tasklet) submission until after all of our updates. */
engine->execlists.queue_priority = prio;
tasklet_hi_schedule(&engine->execlists.tasklet); tasklet_hi_schedule(&engine->execlists.tasklet);
} }

View file

@ -31,6 +31,7 @@ struct i915_selftest {
unsigned long timeout_jiffies; unsigned long timeout_jiffies;
unsigned int timeout_ms; unsigned int timeout_ms;
unsigned int random_seed; unsigned int random_seed;
char *filter;
int mock; int mock;
int live; int live;
}; };

View file

@ -9,25 +9,155 @@
#include "i915_timeline.h" #include "i915_timeline.h"
#include "i915_syncmap.h" #include "i915_syncmap.h"
void i915_timeline_init(struct drm_i915_private *i915, struct i915_timeline_hwsp {
struct i915_timeline *timeline, struct i915_vma *vma;
const char *name) struct list_head free_link;
u64 free_bitmap;
};
static inline struct i915_timeline_hwsp *
i915_timeline_hwsp(const struct i915_timeline *tl)
{ {
lockdep_assert_held(&i915->drm.struct_mutex); return tl->hwsp_ggtt->private;
}
static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
if (IS_ERR(obj))
return ERR_CAST(obj);
i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
if (IS_ERR(vma))
i915_gem_object_put(obj);
return vma;
}
static struct i915_vma *
hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
{
struct drm_i915_private *i915 = timeline->i915;
struct i915_gt_timelines *gt = &i915->gt.timelines;
struct i915_timeline_hwsp *hwsp;
BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
spin_lock(&gt->hwsp_lock);
/* hwsp_free_list only contains HWSP that have available cachelines */
hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
typeof(*hwsp), free_link);
if (!hwsp) {
struct i915_vma *vma;
spin_unlock(&gt->hwsp_lock);
hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
if (!hwsp)
return ERR_PTR(-ENOMEM);
vma = __hwsp_alloc(i915);
if (IS_ERR(vma)) {
kfree(hwsp);
return vma;
}
vma->private = hwsp;
hwsp->vma = vma;
hwsp->free_bitmap = ~0ull;
spin_lock(&gt->hwsp_lock);
list_add(&hwsp->free_link, &gt->hwsp_free_list);
}
GEM_BUG_ON(!hwsp->free_bitmap);
*cacheline = __ffs64(hwsp->free_bitmap);
hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
if (!hwsp->free_bitmap)
list_del(&hwsp->free_link);
spin_unlock(&gt->hwsp_lock);
GEM_BUG_ON(hwsp->vma->private != hwsp);
return hwsp->vma;
}
static void hwsp_free(struct i915_timeline *timeline)
{
struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
struct i915_timeline_hwsp *hwsp;
hwsp = i915_timeline_hwsp(timeline);
if (!hwsp) /* leave global HWSP alone! */
return;
spin_lock(&gt->hwsp_lock);
/* As a cacheline becomes available, publish the HWSP on the freelist */
if (!hwsp->free_bitmap)
list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
/* And if no one is left using it, give the page back to the system */
if (hwsp->free_bitmap == ~0ull) {
i915_vma_put(hwsp->vma);
list_del(&hwsp->free_link);
kfree(hwsp);
}
spin_unlock(&gt->hwsp_lock);
}
int i915_timeline_init(struct drm_i915_private *i915,
struct i915_timeline *timeline,
const char *name,
struct i915_vma *hwsp)
{
void *vaddr;
/* /*
* Ideally we want a set of engines on a single leaf as we expect * Ideally we want a set of engines on a single leaf as we expect
* to mostly be tracking synchronisation between engines. It is not * to mostly be tracking synchronisation between engines. It is not
* a huge issue if this is not the case, but we may want to mitigate * a huge issue if this is not the case, but we may want to mitigate
* any page crossing penalties if they become an issue. * any page crossing penalties if they become an issue.
*
* Called during early_init before we know how many engines there are.
*/ */
BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
timeline->i915 = i915;
timeline->name = name; timeline->name = name;
timeline->pin_count = 0;
timeline->has_initial_breadcrumb = !hwsp;
list_add(&timeline->link, &i915->gt.timelines); timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
if (!hwsp) {
unsigned int cacheline;
/* Called during early_init before we know how many engines there are */ hwsp = hwsp_alloc(timeline, &cacheline);
if (IS_ERR(hwsp))
return PTR_ERR(hwsp);
timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
}
timeline->hwsp_ggtt = i915_vma_get(hwsp);
vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
if (IS_ERR(vaddr)) {
hwsp_free(timeline);
i915_vma_put(hwsp);
return PTR_ERR(vaddr);
}
timeline->hwsp_seqno =
memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
timeline->fence_context = dma_fence_context_alloc(1); timeline->fence_context = dma_fence_context_alloc(1);
@ -37,6 +167,40 @@ void i915_timeline_init(struct drm_i915_private *i915,
INIT_LIST_HEAD(&timeline->requests); INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync); i915_syncmap_init(&timeline->sync);
return 0;
}
void i915_timelines_init(struct drm_i915_private *i915)
{
struct i915_gt_timelines *gt = &i915->gt.timelines;
mutex_init(&gt->mutex);
INIT_LIST_HEAD(&gt->active_list);
spin_lock_init(&gt->hwsp_lock);
INIT_LIST_HEAD(&gt->hwsp_free_list);
/* via i915_gem_wait_for_idle() */
i915_gem_shrinker_taints_mutex(i915, &gt->mutex);
}
static void timeline_add_to_active(struct i915_timeline *tl)
{
struct i915_gt_timelines *gt = &tl->i915->gt.timelines;
mutex_lock(&gt->mutex);
list_add(&tl->link, &gt->active_list);
mutex_unlock(&gt->mutex);
}
static void timeline_remove_from_active(struct i915_timeline *tl)
{
struct i915_gt_timelines *gt = &tl->i915->gt.timelines;
mutex_lock(&gt->mutex);
list_del(&tl->link);
mutex_unlock(&gt->mutex);
} }
/** /**
@ -51,11 +215,11 @@ void i915_timeline_init(struct drm_i915_private *i915,
*/ */
void i915_timelines_park(struct drm_i915_private *i915) void i915_timelines_park(struct drm_i915_private *i915)
{ {
struct i915_gt_timelines *gt = &i915->gt.timelines;
struct i915_timeline *timeline; struct i915_timeline *timeline;
lockdep_assert_held(&i915->drm.struct_mutex); mutex_lock(&gt->mutex);
list_for_each_entry(timeline, &gt->active_list, link) {
list_for_each_entry(timeline, &i915->gt.timelines, link) {
/* /*
* All known fences are completed so we can scrap * All known fences are completed so we can scrap
* the current sync point tracking and start afresh, * the current sync point tracking and start afresh,
@ -64,32 +228,87 @@ void i915_timelines_park(struct drm_i915_private *i915)
*/ */
i915_syncmap_free(&timeline->sync); i915_syncmap_free(&timeline->sync);
} }
mutex_unlock(&gt->mutex);
} }
void i915_timeline_fini(struct i915_timeline *timeline) void i915_timeline_fini(struct i915_timeline *timeline)
{ {
GEM_BUG_ON(timeline->pin_count);
GEM_BUG_ON(!list_empty(&timeline->requests)); GEM_BUG_ON(!list_empty(&timeline->requests));
i915_syncmap_free(&timeline->sync); i915_syncmap_free(&timeline->sync);
hwsp_free(timeline);
list_del(&timeline->link); i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
i915_vma_put(timeline->hwsp_ggtt);
} }
struct i915_timeline * struct i915_timeline *
i915_timeline_create(struct drm_i915_private *i915, const char *name) i915_timeline_create(struct drm_i915_private *i915,
const char *name,
struct i915_vma *global_hwsp)
{ {
struct i915_timeline *timeline; struct i915_timeline *timeline;
int err;
timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
if (!timeline) if (!timeline)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
i915_timeline_init(i915, timeline, name); err = i915_timeline_init(i915, timeline, name, global_hwsp);
if (err) {
kfree(timeline);
return ERR_PTR(err);
}
kref_init(&timeline->kref); kref_init(&timeline->kref);
return timeline; return timeline;
} }
int i915_timeline_pin(struct i915_timeline *tl)
{
int err;
if (tl->pin_count++)
return 0;
GEM_BUG_ON(!tl->pin_count);
err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
goto unpin;
tl->hwsp_offset =
i915_ggtt_offset(tl->hwsp_ggtt) +
offset_in_page(tl->hwsp_offset);
timeline_add_to_active(tl);
return 0;
unpin:
tl->pin_count = 0;
return err;
}
void i915_timeline_unpin(struct i915_timeline *tl)
{
GEM_BUG_ON(!tl->pin_count);
if (--tl->pin_count)
return;
timeline_remove_from_active(tl);
/*
* Since this timeline is idle, all bariers upon which we were waiting
* must also be complete and so we can discard the last used barriers
* without loss of information.
*/
i915_syncmap_free(&tl->sync);
__i915_vma_unpin(tl->hwsp_ggtt);
}
void __i915_timeline_free(struct kref *kref) void __i915_timeline_free(struct kref *kref)
{ {
struct i915_timeline *timeline = struct i915_timeline *timeline =
@ -99,6 +318,16 @@ void __i915_timeline_free(struct kref *kref)
kfree(timeline); kfree(timeline);
} }
void i915_timelines_fini(struct drm_i915_private *i915)
{
struct i915_gt_timelines *gt = &i915->gt.timelines;
GEM_BUG_ON(!list_empty(&gt->active_list));
GEM_BUG_ON(!list_empty(&gt->hwsp_free_list));
mutex_destroy(&gt->mutex);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_timeline.c" #include "selftests/mock_timeline.c"
#include "selftests/i915_timeline.c" #include "selftests/i915_timeline.c"

View file

@ -32,6 +32,9 @@
#include "i915_syncmap.h" #include "i915_syncmap.h"
#include "i915_utils.h" #include "i915_utils.h"
struct i915_vma;
struct i915_timeline_hwsp;
struct i915_timeline { struct i915_timeline {
u64 fence_context; u64 fence_context;
u32 seqno; u32 seqno;
@ -40,6 +43,13 @@ struct i915_timeline {
#define TIMELINE_CLIENT 0 /* default subclass */ #define TIMELINE_CLIENT 0 /* default subclass */
#define TIMELINE_ENGINE 1 #define TIMELINE_ENGINE 1
unsigned int pin_count;
const u32 *hwsp_seqno;
struct i915_vma *hwsp_ggtt;
u32 hwsp_offset;
bool has_initial_breadcrumb;
/** /**
* List of breadcrumbs associated with GPU requests currently * List of breadcrumbs associated with GPU requests currently
* outstanding. * outstanding.
@ -66,13 +76,15 @@ struct i915_timeline {
struct list_head link; struct list_head link;
const char *name; const char *name;
struct drm_i915_private *i915;
struct kref kref; struct kref kref;
}; };
void i915_timeline_init(struct drm_i915_private *i915, int i915_timeline_init(struct drm_i915_private *i915,
struct i915_timeline *tl, struct i915_timeline *tl,
const char *name); const char *name,
struct i915_vma *hwsp);
void i915_timeline_fini(struct i915_timeline *tl); void i915_timeline_fini(struct i915_timeline *tl);
static inline void static inline void
@ -95,7 +107,9 @@ i915_timeline_set_subclass(struct i915_timeline *timeline,
} }
struct i915_timeline * struct i915_timeline *
i915_timeline_create(struct drm_i915_private *i915, const char *name); i915_timeline_create(struct drm_i915_private *i915,
const char *name,
struct i915_vma *global_hwsp);
static inline struct i915_timeline * static inline struct i915_timeline *
i915_timeline_get(struct i915_timeline *timeline) i915_timeline_get(struct i915_timeline *timeline)
@ -134,6 +148,11 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
} }
int i915_timeline_pin(struct i915_timeline *tl);
void i915_timeline_unpin(struct i915_timeline *tl);
void i915_timelines_init(struct drm_i915_private *i915);
void i915_timelines_park(struct drm_i915_private *i915); void i915_timelines_park(struct drm_i915_private *i915);
void i915_timelines_fini(struct drm_i915_private *i915);
#endif #endif

View file

@ -752,31 +752,6 @@ trace_i915_request_out(struct i915_request *rq)
#endif #endif
#endif #endif
TRACE_EVENT(intel_engine_notify,
TP_PROTO(struct intel_engine_cs *engine, bool waiters),
TP_ARGS(engine, waiters),
TP_STRUCT__entry(
__field(u32, dev)
__field(u16, class)
__field(u16, instance)
__field(u32, seqno)
__field(bool, waiters)
),
TP_fast_assign(
__entry->dev = engine->i915->drm.primary->index;
__entry->class = engine->uabi_class;
__entry->instance = engine->instance;
__entry->seqno = intel_engine_get_seqno(engine);
__entry->waiters = waiters;
),
TP_printk("dev=%u, engine=%u:%u, seqno=%u, waiters=%u",
__entry->dev, __entry->class, __entry->instance,
__entry->seqno, __entry->waiters)
);
DEFINE_EVENT(i915_request, i915_request_retire, DEFINE_EVENT(i915_request, i915_request_retire,
TP_PROTO(struct i915_request *rq), TP_PROTO(struct i915_request *rq),
TP_ARGS(rq) TP_ARGS(rq)

View file

@ -79,9 +79,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
if (--vma->active_count) if (--vma->active_count)
return; return;
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
GEM_BUG_ON(!i915_gem_object_is_active(obj)); GEM_BUG_ON(!i915_gem_object_is_active(obj));
if (--obj->active_count) if (--obj->active_count)
return; return;
@ -190,33 +187,56 @@ vma_create(struct drm_i915_gem_object *obj,
i915_gem_object_get_stride(obj)); i915_gem_object_get_stride(obj));
GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
vma->flags |= I915_VMA_GGTT;
}
spin_lock(&obj->vma.lock);
rb = NULL;
p = &obj->vma.tree.rb_node;
while (*p) {
struct i915_vma *pos;
long cmp;
rb = *p;
pos = rb_entry(rb, struct i915_vma, obj_node);
/*
* If the view already exists in the tree, another thread
* already created a matching vma, so return the older instance
* and dispose of ours.
*/
cmp = i915_vma_compare(pos, vm, view);
if (cmp == 0) {
spin_unlock(&obj->vma.lock);
kmem_cache_free(vm->i915->vmas, vma);
return pos;
}
if (cmp < 0)
p = &rb->rb_right;
else
p = &rb->rb_left;
}
rb_link_node(&vma->obj_node, rb, p);
rb_insert_color(&vma->obj_node, &obj->vma.tree);
if (i915_vma_is_ggtt(vma))
/* /*
* We put the GGTT vma at the start of the vma-list, followed * We put the GGTT vma at the start of the vma-list, followed
* by the ppGGTT vma. This allows us to break early when * by the ppGGTT vma. This allows us to break early when
* iterating over only the GGTT vma for an object, see * iterating over only the GGTT vma for an object, see
* for_each_ggtt_vma() * for_each_ggtt_vma()
*/ */
vma->flags |= I915_VMA_GGTT; list_add(&vma->obj_link, &obj->vma.list);
list_add(&vma->obj_link, &obj->vma_list);
} else {
list_add_tail(&vma->obj_link, &obj->vma_list);
}
rb = NULL;
p = &obj->vma_tree.rb_node;
while (*p) {
struct i915_vma *pos;
rb = *p;
pos = rb_entry(rb, struct i915_vma, obj_node);
if (i915_vma_compare(pos, vm, view) < 0)
p = &rb->rb_right;
else else
p = &rb->rb_left; list_add_tail(&vma->obj_link, &obj->vma.list);
}
rb_link_node(&vma->obj_node, rb, p); spin_unlock(&obj->vma.lock);
rb_insert_color(&vma->obj_node, &obj->vma_tree);
mutex_lock(&vm->mutex);
list_add(&vma->vm_link, &vm->unbound_list); list_add(&vma->vm_link, &vm->unbound_list);
mutex_unlock(&vm->mutex);
return vma; return vma;
@ -232,7 +252,7 @@ vma_lookup(struct drm_i915_gem_object *obj,
{ {
struct rb_node *rb; struct rb_node *rb;
rb = obj->vma_tree.rb_node; rb = obj->vma.tree.rb_node;
while (rb) { while (rb) {
struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
long cmp; long cmp;
@ -272,16 +292,18 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
{ {
struct i915_vma *vma; struct i915_vma *vma;
lockdep_assert_held(&obj->base.dev->struct_mutex);
GEM_BUG_ON(view && !i915_is_ggtt(vm)); GEM_BUG_ON(view && !i915_is_ggtt(vm));
GEM_BUG_ON(vm->closed); GEM_BUG_ON(vm->closed);
spin_lock(&obj->vma.lock);
vma = vma_lookup(obj, vm, view); vma = vma_lookup(obj, vm, view);
if (!vma) spin_unlock(&obj->vma.lock);
/* vma_create() will resolve the race if another creates the vma */
if (unlikely(!vma))
vma = vma_create(obj, vm, view); vma = vma_create(obj, vm, view);
GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma);
return vma; return vma;
} }
@ -659,7 +681,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
list_move_tail(&vma->vm_link, &vma->vm->inactive_list); mutex_lock(&vma->vm->mutex);
list_move_tail(&vma->vm_link, &vma->vm->bound_list);
mutex_unlock(&vma->vm->mutex);
if (vma->obj) { if (vma->obj) {
struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_object *obj = vma->obj;
@ -692,8 +716,10 @@ i915_vma_remove(struct i915_vma *vma)
vma->ops->clear_pages(vma); vma->ops->clear_pages(vma);
mutex_lock(&vma->vm->mutex);
drm_mm_remove_node(&vma->node); drm_mm_remove_node(&vma->node);
list_move_tail(&vma->vm_link, &vma->vm->unbound_list); list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
mutex_unlock(&vma->vm->mutex);
/* /*
* Since the unbound list is global, only move to that list if * Since the unbound list is global, only move to that list if
@ -804,10 +830,18 @@ static void __i915_vma_destroy(struct i915_vma *vma)
GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
list_del(&vma->obj_link); mutex_lock(&vma->vm->mutex);
list_del(&vma->vm_link); list_del(&vma->vm_link);
if (vma->obj) mutex_unlock(&vma->vm->mutex);
rb_erase(&vma->obj_node, &vma->obj->vma_tree);
if (vma->obj) {
struct drm_i915_gem_object *obj = vma->obj;
spin_lock(&obj->vma.lock);
list_del(&vma->obj_link);
rb_erase(&vma->obj_node, &vma->obj->vma.tree);
spin_unlock(&obj->vma.lock);
}
rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) { rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
GEM_BUG_ON(i915_gem_active_isset(&iter->base)); GEM_BUG_ON(i915_gem_active_isset(&iter->base));
@ -1003,10 +1037,8 @@ int i915_vma_move_to_active(struct i915_vma *vma,
* add the active reference first and queue for it to be dropped * add the active reference first and queue for it to be dropped
* *last*. * *last*.
*/ */
if (!i915_gem_active_isset(active) && !vma->active_count++) { if (!i915_gem_active_isset(active) && !vma->active_count++)
list_move_tail(&vma->vm_link, &vma->vm->active_list);
obj->active_count++; obj->active_count++;
}
i915_gem_active_set(active, rq); i915_gem_active_set(active, rq);
GEM_BUG_ON(!i915_vma_is_active(vma)); GEM_BUG_ON(!i915_vma_is_active(vma));
GEM_BUG_ON(!obj->active_count); GEM_BUG_ON(!obj->active_count);

View file

@ -71,29 +71,42 @@ struct i915_vma {
unsigned int open_count; unsigned int open_count;
unsigned long flags; unsigned long flags;
/** /**
* How many users have pinned this object in GTT space. The following * How many users have pinned this object in GTT space.
* users can each hold at most one reference: pwrite/pread, execbuffer
* (objects are not allowed multiple times for the same batchbuffer),
* and the framebuffer code. When switching/pageflipping, the
* framebuffer code has at most two buffers pinned per crtc.
* *
* In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 * This is a tightly bound, fairly small number of users, so we
* bits with absolutely no headroom. So use 4 bits. * stuff inside the flags field so that we can both check for overflow
* and detect a no-op i915_vma_pin() in a single check, while also
* pinning the vma.
*
* The worst case display setup would have the same vma pinned for
* use on each plane on each crtc, while also building the next atomic
* state and holding a pin for the length of the cleanup queue. In the
* future, the flip queue may be increased from 1.
* Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84
*
* For GEM, the number of concurrent users for pwrite/pread is
* unbounded. For execbuffer, it is currently one but will in future
* be extended to allow multiple clients to pin vma concurrently.
*
* We also use suballocated pages, with each suballocation claiming
* its own pin on the shared vma. At present, this is limited to
* exclusive cachelines of a single page, so a maximum of 64 possible
* users.
*/ */
#define I915_VMA_PIN_MASK 0xf #define I915_VMA_PIN_MASK 0xff
#define I915_VMA_PIN_OVERFLOW BIT(5) #define I915_VMA_PIN_OVERFLOW BIT(8)
/** Flags and address space this VMA is bound to */ /** Flags and address space this VMA is bound to */
#define I915_VMA_GLOBAL_BIND BIT(6) #define I915_VMA_GLOBAL_BIND BIT(9)
#define I915_VMA_LOCAL_BIND BIT(7) #define I915_VMA_LOCAL_BIND BIT(10)
#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
#define I915_VMA_GGTT BIT(8) #define I915_VMA_GGTT BIT(11)
#define I915_VMA_CAN_FENCE BIT(9) #define I915_VMA_CAN_FENCE BIT(12)
#define I915_VMA_CLOSED BIT(10) #define I915_VMA_CLOSED BIT(13)
#define I915_VMA_USERFAULT_BIT 11 #define I915_VMA_USERFAULT_BIT 14
#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) #define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT)
#define I915_VMA_GGTT_WRITE BIT(12) #define I915_VMA_GGTT_WRITE BIT(15)
unsigned int active_count; unsigned int active_count;
struct rb_root active; struct rb_root active;
@ -425,7 +438,7 @@ void i915_vma_parked(struct drm_i915_private *i915);
* or the list is empty ofc. * or the list is empty ofc.
*/ */
#define for_each_ggtt_vma(V, OBJ) \ #define for_each_ggtt_vma(V, OBJ) \
list_for_each_entry(V, &(OBJ)->vma_list, obj_link) \ list_for_each_entry(V, &(OBJ)->vma.list, obj_link) \
for_each_until(!i915_vma_is_ggtt(V)) for_each_until(!i915_vma_is_ggtt(V))
#endif #endif

View file

@ -110,41 +110,39 @@ intel_plane_destroy_state(struct drm_plane *plane,
} }
int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state, int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
struct intel_crtc_state *crtc_state, struct intel_crtc_state *new_crtc_state,
const struct intel_plane_state *old_plane_state, const struct intel_plane_state *old_plane_state,
struct intel_plane_state *intel_state) struct intel_plane_state *new_plane_state)
{ {
struct drm_plane *plane = intel_state->base.plane; struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane);
struct drm_plane_state *state = &intel_state->base;
struct intel_plane *intel_plane = to_intel_plane(plane);
int ret; int ret;
crtc_state->active_planes &= ~BIT(intel_plane->id); new_crtc_state->active_planes &= ~BIT(plane->id);
crtc_state->nv12_planes &= ~BIT(intel_plane->id); new_crtc_state->nv12_planes &= ~BIT(plane->id);
intel_state->base.visible = false; new_plane_state->base.visible = false;
/* If this is a cursor plane, no further checks are needed. */ if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
if (!intel_state->base.crtc && !old_plane_state->base.crtc)
return 0; return 0;
ret = intel_plane->check_plane(crtc_state, intel_state); ret = plane->check_plane(new_crtc_state, new_plane_state);
if (ret) if (ret)
return ret; return ret;
/* FIXME pre-g4x don't work like this */ /* FIXME pre-g4x don't work like this */
if (state->visible) if (new_plane_state->base.visible)
crtc_state->active_planes |= BIT(intel_plane->id); new_crtc_state->active_planes |= BIT(plane->id);
if (state->visible && state->fb->format->format == DRM_FORMAT_NV12) if (new_plane_state->base.visible &&
crtc_state->nv12_planes |= BIT(intel_plane->id); new_plane_state->base.fb->format->format == DRM_FORMAT_NV12)
new_crtc_state->nv12_planes |= BIT(plane->id);
if (state->visible || old_plane_state->base.visible) if (new_plane_state->base.visible || old_plane_state->base.visible)
crtc_state->update_planes |= BIT(intel_plane->id); new_crtc_state->update_planes |= BIT(plane->id);
return intel_plane_atomic_calc_changes(old_crtc_state, return intel_plane_atomic_calc_changes(old_crtc_state,
&crtc_state->base, &new_crtc_state->base,
old_plane_state, old_plane_state,
state); &new_plane_state->base);
} }
static int intel_plane_atomic_check(struct drm_plane *plane, static int intel_plane_atomic_check(struct drm_plane *plane,

View file

@ -1663,6 +1663,13 @@ init_vbt_missing_defaults(struct drm_i915_private *dev_priv)
struct ddi_vbt_port_info *info = struct ddi_vbt_port_info *info =
&dev_priv->vbt.ddi_port_info[port]; &dev_priv->vbt.ddi_port_info[port];
/*
* VBT has the TypeC mode (native,TBT/USB) and we don't want
* to detect it.
*/
if (intel_port_is_tc(dev_priv, port))
continue;
info->supports_dvi = (port != PORT_A && port != PORT_E); info->supports_dvi = (port != PORT_A && port != PORT_E);
info->supports_hdmi = info->supports_dvi; info->supports_hdmi = info->supports_dvi;
info->supports_dp = (port != PORT_E); info->supports_dp = (port != PORT_E);

View file

@ -29,133 +29,6 @@
#define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_rq) #define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_rq)
static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b)
{
struct intel_wait *wait;
unsigned int result = 0;
lockdep_assert_held(&b->irq_lock);
wait = b->irq_wait;
if (wait) {
/*
* N.B. Since task_asleep() and ttwu are not atomic, the
* waiter may actually go to sleep after the check, causing
* us to suppress a valid wakeup. We prefer to reduce the
* number of false positive missed_breadcrumb() warnings
* at the expense of a few false negatives, as it it easy
* to trigger a false positive under heavy load. Enough
* signal should remain from genuine missed_breadcrumb()
* for us to detect in CI.
*/
bool was_asleep = task_asleep(wait->tsk);
result = ENGINE_WAKEUP_WAITER;
if (wake_up_process(wait->tsk) && was_asleep)
result |= ENGINE_WAKEUP_ASLEEP;
}
return result;
}
unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned long flags;
unsigned int result;
spin_lock_irqsave(&b->irq_lock, flags);
result = __intel_breadcrumbs_wakeup(b);
spin_unlock_irqrestore(&b->irq_lock, flags);
return result;
}
static unsigned long wait_timeout(void)
{
return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
}
static noinline void missed_breadcrumb(struct intel_engine_cs *engine)
{
if (GEM_SHOW_DEBUG()) {
struct drm_printer p = drm_debug_printer(__func__);
intel_engine_dump(engine, &p,
"%s missed breadcrumb at %pS\n",
engine->name, __builtin_return_address(0));
}
set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
}
static void intel_breadcrumbs_hangcheck(struct timer_list *t)
{
struct intel_engine_cs *engine =
from_timer(engine, t, breadcrumbs.hangcheck);
struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned int irq_count;
if (!b->irq_armed)
return;
irq_count = READ_ONCE(b->irq_count);
if (b->hangcheck_interrupts != irq_count) {
b->hangcheck_interrupts = irq_count;
mod_timer(&b->hangcheck, wait_timeout());
return;
}
/* We keep the hangcheck timer alive until we disarm the irq, even
* if there are no waiters at present.
*
* If the waiter was currently running, assume it hasn't had a chance
* to process the pending interrupt (e.g, low priority task on a loaded
* system) and wait until it sleeps before declaring a missed interrupt.
*
* If the waiter was asleep (and not even pending a wakeup), then we
* must have missed an interrupt as the GPU has stopped advancing
* but we still have a waiter. Assuming all batches complete within
* DRM_I915_HANGCHECK_JIFFIES [1.5s]!
*/
if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) {
missed_breadcrumb(engine);
mod_timer(&b->fake_irq, jiffies + 1);
} else {
mod_timer(&b->hangcheck, wait_timeout());
}
}
static void intel_breadcrumbs_fake_irq(struct timer_list *t)
{
struct intel_engine_cs *engine =
from_timer(engine, t, breadcrumbs.fake_irq);
struct intel_breadcrumbs *b = &engine->breadcrumbs;
/*
* The timer persists in case we cannot enable interrupts,
* or if we have previously seen seqno/interrupt incoherency
* ("missed interrupt" syndrome, better known as a "missed breadcrumb").
* Here the worker will wake up every jiffie in order to kick the
* oldest waiter to do the coherent seqno check.
*/
spin_lock_irq(&b->irq_lock);
if (b->irq_armed && !__intel_breadcrumbs_wakeup(b))
__intel_engine_disarm_breadcrumbs(engine);
spin_unlock_irq(&b->irq_lock);
if (!b->irq_armed)
return;
/* If the user has disabled the fake-irq, restore the hangchecking */
if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) {
mod_timer(&b->hangcheck, wait_timeout());
return;
}
mod_timer(&b->fake_irq, jiffies + 1);
}
static void irq_enable(struct intel_engine_cs *engine) static void irq_enable(struct intel_engine_cs *engine)
{ {
if (!engine->irq_enable) if (!engine->irq_enable)
@ -178,21 +51,126 @@ static void irq_disable(struct intel_engine_cs *engine)
spin_unlock(&engine->i915->irq_lock); spin_unlock(&engine->i915->irq_lock);
} }
void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
{ {
struct intel_breadcrumbs *b = &engine->breadcrumbs;
lockdep_assert_held(&b->irq_lock); lockdep_assert_held(&b->irq_lock);
GEM_BUG_ON(b->irq_wait);
GEM_BUG_ON(!b->irq_armed);
GEM_BUG_ON(!b->irq_enabled); GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled) if (!--b->irq_enabled)
irq_disable(engine); irq_disable(container_of(b,
struct intel_engine_cs,
breadcrumbs));
b->irq_armed = false; b->irq_armed = false;
} }
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
if (!b->irq_armed)
return;
spin_lock_irq(&b->irq_lock);
if (b->irq_armed)
__intel_breadcrumbs_disarm_irq(b);
spin_unlock_irq(&b->irq_lock);
}
static inline bool __request_completed(const struct i915_request *rq)
{
return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
}
bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct intel_context *ce, *cn;
struct list_head *pos, *next;
LIST_HEAD(signal);
spin_lock(&b->irq_lock);
if (b->irq_armed && list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
GEM_BUG_ON(list_empty(&ce->signals));
list_for_each_safe(pos, next, &ce->signals) {
struct i915_request *rq =
list_entry(pos, typeof(*rq), signal_link);
if (!__request_completed(rq))
break;
GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
&rq->fence.flags));
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
/*
* We may race with direct invocation of
* dma_fence_signal(), e.g. i915_request_retire(),
* in which case we can skip processing it ourselves.
*/
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&rq->fence.flags))
continue;
/*
* Queue for execution after dropping the signaling
* spinlock as the callback chain may end up adding
* more signalers to the same context or engine.
*/
i915_request_get(rq);
list_add_tail(&rq->signal_link, &signal);
}
/*
* We process the list deletion in bulk, only using a list_add
* (not list_move) above but keeping the status of
* rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit.
*/
if (!list_is_first(pos, &ce->signals)) {
/* Advance the list to the first incomplete request */
__list_del_many(&ce->signals, pos);
if (&ce->signals == pos) /* now empty */
list_del_init(&ce->signal_link);
}
}
spin_unlock(&b->irq_lock);
list_for_each_safe(pos, next, &signal) {
struct i915_request *rq =
list_entry(pos, typeof(*rq), signal_link);
dma_fence_signal(&rq->fence);
i915_request_put(rq);
}
return !list_empty(&signal);
}
bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
{
bool result;
local_irq_disable();
result = intel_engine_breadcrumbs_irq(engine);
local_irq_enable();
return result;
}
static void signal_irq_work(struct irq_work *work)
{
struct intel_engine_cs *engine =
container_of(work, typeof(*engine), breadcrumbs.irq_work);
intel_engine_breadcrumbs_irq(engine);
}
void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine)
{ {
struct intel_breadcrumbs *b = &engine->breadcrumbs; struct intel_breadcrumbs *b = &engine->breadcrumbs;
@ -215,77 +193,14 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine)
spin_unlock_irq(&b->irq_lock); spin_unlock_irq(&b->irq_lock);
} }
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct intel_wait *wait, *n;
if (!b->irq_armed)
return;
/*
* We only disarm the irq when we are idle (all requests completed),
* so if the bottom-half remains asleep, it missed the request
* completion.
*/
if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP)
missed_breadcrumb(engine);
spin_lock_irq(&b->rb_lock);
spin_lock(&b->irq_lock);
b->irq_wait = NULL;
if (b->irq_armed)
__intel_engine_disarm_breadcrumbs(engine);
spin_unlock(&b->irq_lock);
rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) {
GEM_BUG_ON(!intel_engine_signaled(engine, wait->seqno));
RB_CLEAR_NODE(&wait->node);
wake_up_process(wait->tsk);
}
b->waiters = RB_ROOT;
spin_unlock_irq(&b->rb_lock);
}
static bool use_fake_irq(const struct intel_breadcrumbs *b)
{
const struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs);
if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
return false;
/*
* Only start with the heavy weight fake irq timer if we have not
* seen any interrupts since enabling it the first time. If the
* interrupts are still arriving, it means we made a mistake in our
* engine->seqno_barrier(), a timing error that should be transient
* and unlikely to reoccur.
*/
return READ_ONCE(b->irq_count) == b->hangcheck_interrupts;
}
static void enable_fake_irq(struct intel_breadcrumbs *b)
{
/* Ensure we never sleep indefinitely */
if (!b->irq_enabled || use_fake_irq(b))
mod_timer(&b->fake_irq, jiffies + 1);
else
mod_timer(&b->hangcheck, wait_timeout());
}
static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
{ {
struct intel_engine_cs *engine = struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs); container_of(b, struct intel_engine_cs, breadcrumbs);
struct drm_i915_private *i915 = engine->i915;
bool enabled;
lockdep_assert_held(&b->irq_lock); lockdep_assert_held(&b->irq_lock);
if (b->irq_armed) if (b->irq_armed)
return false; return;
/* /*
* The breadcrumb irq will be disarmed on the interrupt after the * The breadcrumb irq will be disarmed on the interrupt after the
@ -303,509 +218,18 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
* the driver is idle) we disarm the breadcrumbs. * the driver is idle) we disarm the breadcrumbs.
*/ */
/* No interrupts? Kick the waiter every jiffie! */ if (!b->irq_enabled++)
enabled = false;
if (!b->irq_enabled++ &&
!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) {
irq_enable(engine); irq_enable(engine);
enabled = true;
} }
enable_fake_irq(b); void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
return enabled;
}
static inline struct intel_wait *to_wait(struct rb_node *node)
{
return rb_entry(node, struct intel_wait, node);
}
static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
struct intel_wait *wait)
{
lockdep_assert_held(&b->rb_lock);
GEM_BUG_ON(b->irq_wait == wait);
/*
* This request is completed, so remove it from the tree, mark it as
* complete, and *then* wake up the associated task. N.B. when the
* task wakes up, it will find the empty rb_node, discern that it
* has already been removed from the tree and skip the serialisation
* of the b->rb_lock and b->irq_lock. This means that the destruction
* of the intel_wait is not serialised with the interrupt handler
* by the waiter - it must instead be serialised by the caller.
*/
rb_erase(&wait->node, &b->waiters);
RB_CLEAR_NODE(&wait->node);
if (wait->tsk->state != TASK_RUNNING)
wake_up_process(wait->tsk); /* implicit smp_wmb() */
}
static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
struct rb_node *next)
{ {
struct intel_breadcrumbs *b = &engine->breadcrumbs; struct intel_breadcrumbs *b = &engine->breadcrumbs;
spin_lock(&b->irq_lock);
GEM_BUG_ON(!b->irq_armed);
GEM_BUG_ON(!b->irq_wait);
b->irq_wait = to_wait(next);
spin_unlock(&b->irq_lock);
/* We always wake up the next waiter that takes over as the bottom-half
* as we may delegate not only the irq-seqno barrier to the next waiter
* but also the task of waking up concurrent waiters.
*/
if (next)
wake_up_process(to_wait(next)->tsk);
}
static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
struct intel_wait *wait)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct rb_node **p, *parent, *completed;
bool first, armed;
u32 seqno;
GEM_BUG_ON(!wait->seqno);
/* Insert the request into the retirement ordered list
* of waiters by walking the rbtree. If we are the oldest
* seqno in the tree (the first to be retired), then
* set ourselves as the bottom-half.
*
* As we descend the tree, prune completed branches since we hold the
* spinlock we know that the first_waiter must be delayed and can
* reduce some of the sequential wake up latency if we take action
* ourselves and wake up the completed tasks in parallel. Also, by
* removing stale elements in the tree, we may be able to reduce the
* ping-pong between the old bottom-half and ourselves as first-waiter.
*/
armed = false;
first = true;
parent = NULL;
completed = NULL;
seqno = intel_engine_get_seqno(engine);
/* If the request completed before we managed to grab the spinlock,
* return now before adding ourselves to the rbtree. We let the
* current bottom-half handle any pending wakeups and instead
* try and get out of the way quickly.
*/
if (i915_seqno_passed(seqno, wait->seqno)) {
RB_CLEAR_NODE(&wait->node);
return first;
}
p = &b->waiters.rb_node;
while (*p) {
parent = *p;
if (wait->seqno == to_wait(parent)->seqno) {
/* We have multiple waiters on the same seqno, select
* the highest priority task (that with the smallest
* task->prio) to serve as the bottom-half for this
* group.
*/
if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
p = &parent->rb_right;
first = false;
} else {
p = &parent->rb_left;
}
} else if (i915_seqno_passed(wait->seqno,
to_wait(parent)->seqno)) {
p = &parent->rb_right;
if (i915_seqno_passed(seqno, to_wait(parent)->seqno))
completed = parent;
else
first = false;
} else {
p = &parent->rb_left;
}
}
rb_link_node(&wait->node, parent, p);
rb_insert_color(&wait->node, &b->waiters);
if (first) {
spin_lock(&b->irq_lock);
b->irq_wait = wait;
/* After assigning ourselves as the new bottom-half, we must
* perform a cursory check to prevent a missed interrupt.
* Either we miss the interrupt whilst programming the hardware,
* or if there was a previous waiter (for a later seqno) they
* may be woken instead of us (due to the inherent race
* in the unlocked read of b->irq_seqno_bh in the irq handler)
* and so we miss the wake up.
*/
armed = __intel_breadcrumbs_enable_irq(b);
spin_unlock(&b->irq_lock);
}
if (completed) {
/* Advance the bottom-half (b->irq_wait) before we wake up
* the waiters who may scribble over their intel_wait
* just as the interrupt handler is dereferencing it via
* b->irq_wait.
*/
if (!first) {
struct rb_node *next = rb_next(completed);
GEM_BUG_ON(next == &wait->node);
__intel_breadcrumbs_next(engine, next);
}
do {
struct intel_wait *crumb = to_wait(completed);
completed = rb_prev(completed);
__intel_breadcrumbs_finish(b, crumb);
} while (completed);
}
GEM_BUG_ON(!b->irq_wait);
GEM_BUG_ON(!b->irq_armed);
GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node);
return armed;
}
bool intel_engine_add_wait(struct intel_engine_cs *engine,
struct intel_wait *wait)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
bool armed;
spin_lock_irq(&b->rb_lock);
armed = __intel_engine_add_wait(engine, wait);
spin_unlock_irq(&b->rb_lock);
if (armed)
return armed;
/* Make the caller recheck if its request has already started. */
return intel_engine_has_started(engine, wait->seqno);
}
static inline bool chain_wakeup(struct rb_node *rb, int priority)
{
return rb && to_wait(rb)->tsk->prio <= priority;
}
static inline int wakeup_priority(struct intel_breadcrumbs *b,
struct task_struct *tsk)
{
if (tsk == b->signaler)
return INT_MIN;
else
return tsk->prio;
}
static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
struct intel_wait *wait)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
lockdep_assert_held(&b->rb_lock);
if (RB_EMPTY_NODE(&wait->node))
goto out;
if (b->irq_wait == wait) {
const int priority = wakeup_priority(b, wait->tsk);
struct rb_node *next;
/* We are the current bottom-half. Find the next candidate,
* the first waiter in the queue on the remaining oldest
* request. As multiple seqnos may complete in the time it
* takes us to wake up and find the next waiter, we have to
* wake up that waiter for it to perform its own coherent
* completion check.
*/
next = rb_next(&wait->node);
if (chain_wakeup(next, priority)) {
/* If the next waiter is already complete,
* wake it up and continue onto the next waiter. So
* if have a small herd, they will wake up in parallel
* rather than sequentially, which should reduce
* the overall latency in waking all the completed
* clients.
*
* However, waking up a chain adds extra latency to
* the first_waiter. This is undesirable if that
* waiter is a high priority task.
*/
u32 seqno = intel_engine_get_seqno(engine);
while (i915_seqno_passed(seqno, to_wait(next)->seqno)) {
struct rb_node *n = rb_next(next);
__intel_breadcrumbs_finish(b, to_wait(next));
next = n;
if (!chain_wakeup(next, priority))
break;
}
}
__intel_breadcrumbs_next(engine, next);
} else {
GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
}
GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
rb_erase(&wait->node, &b->waiters);
RB_CLEAR_NODE(&wait->node);
out:
GEM_BUG_ON(b->irq_wait == wait);
GEM_BUG_ON(rb_first(&b->waiters) !=
(b->irq_wait ? &b->irq_wait->node : NULL));
}
void intel_engine_remove_wait(struct intel_engine_cs *engine,
struct intel_wait *wait)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
/* Quick check to see if this waiter was already decoupled from
* the tree by the bottom-half to avoid contention on the spinlock
* by the herd.
*/
if (RB_EMPTY_NODE(&wait->node)) {
GEM_BUG_ON(READ_ONCE(b->irq_wait) == wait);
return;
}
spin_lock_irq(&b->rb_lock);
__intel_engine_remove_wait(engine, wait);
spin_unlock_irq(&b->rb_lock);
}
static void signaler_set_rtpriority(void)
{
struct sched_param param = { .sched_priority = 1 };
sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
}
static int intel_breadcrumbs_signaler(void *arg)
{
struct intel_engine_cs *engine = arg;
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct i915_request *rq, *n;
/* Install ourselves with high priority to reduce signalling latency */
signaler_set_rtpriority();
do {
bool do_schedule = true;
LIST_HEAD(list);
u32 seqno;
set_current_state(TASK_INTERRUPTIBLE);
if (list_empty(&b->signals))
goto sleep;
/*
* We are either woken up by the interrupt bottom-half,
* or by a client adding a new signaller. In both cases,
* the GPU seqno may have advanced beyond our oldest signal.
* If it has, propagate the signal, remove the waiter and
* check again with the next oldest signal. Otherwise we
* need to wait for a new interrupt from the GPU or for
* a new client.
*/
seqno = intel_engine_get_seqno(engine);
spin_lock_irq(&b->rb_lock);
list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
u32 this = rq->signaling.wait.seqno;
GEM_BUG_ON(!rq->signaling.wait.seqno);
if (!i915_seqno_passed(seqno, this))
break;
if (likely(this == i915_request_global_seqno(rq))) {
__intel_engine_remove_wait(engine,
&rq->signaling.wait);
rq->signaling.wait.seqno = 0;
__list_del_entry(&rq->signaling.link);
if (!i915_request_signaled(rq)) {
list_add_tail(&rq->signaling.link,
&list);
i915_request_get(rq);
}
}
}
spin_unlock_irq(&b->rb_lock);
if (!list_empty(&list)) {
local_bh_disable();
list_for_each_entry_safe(rq, n, &list, signaling.link) {
dma_fence_signal(&rq->fence);
GEM_BUG_ON(!i915_request_completed(rq));
i915_request_put(rq);
}
local_bh_enable(); /* kick start the tasklets */
/*
* If the engine is saturated we may be continually
* processing completed requests. This angers the
* NMI watchdog if we never let anything else
* have access to the CPU. Let's pretend to be nice
* and relinquish the CPU if we burn through the
* entire RT timeslice!
*/
do_schedule = need_resched();
}
if (unlikely(do_schedule)) {
sleep:
if (kthread_should_park())
kthread_parkme();
if (unlikely(kthread_should_stop()))
break;
schedule();
}
} while (1);
__set_current_state(TASK_RUNNING);
return 0;
}
static void insert_signal(struct intel_breadcrumbs *b,
struct i915_request *request,
const u32 seqno)
{
struct i915_request *iter;
lockdep_assert_held(&b->rb_lock);
/*
* A reasonable assumption is that we are called to add signals
* in sequence, as the requests are submitted for execution and
* assigned a global_seqno. This will be the case for the majority
* of internally generated signals (inter-engine signaling).
*
* Out of order waiters triggering random signaling enabling will
* be more problematic, but hopefully rare enough and the list
* small enough that the O(N) insertion sort is not an issue.
*/
list_for_each_entry_reverse(iter, &b->signals, signaling.link)
if (i915_seqno_passed(seqno, iter->signaling.wait.seqno))
break;
list_add(&request->signaling.link, &iter->signaling.link);
}
bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
{
struct intel_engine_cs *engine = request->engine;
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct intel_wait *wait = &request->signaling.wait;
u32 seqno;
/*
* Note that we may be called from an interrupt handler on another
* device (e.g. nouveau signaling a fence completion causing us
* to submit a request, and so enable signaling). As such,
* we need to make sure that all other users of b->rb_lock protect
* against interrupts, i.e. use spin_lock_irqsave.
*/
/* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&request->lock);
seqno = i915_request_global_seqno(request);
if (!seqno) /* will be enabled later upon execution */
return true;
GEM_BUG_ON(wait->seqno);
wait->tsk = b->signaler;
wait->request = request;
wait->seqno = seqno;
/*
* Add ourselves into the list of waiters, but registering our
* bottom-half as the signaller thread. As per usual, only the oldest
* waiter (not just signaller) is tasked as the bottom-half waking
* up all completed waiters after the user interrupt.
*
* If we are the oldest waiter, enable the irq (after which we
* must double check that the seqno did not complete).
*/
spin_lock(&b->rb_lock);
insert_signal(b, request, seqno);
wakeup &= __intel_engine_add_wait(engine, wait);
spin_unlock(&b->rb_lock);
if (wakeup) {
wake_up_process(b->signaler);
return !intel_wait_complete(wait);
}
return true;
}
void intel_engine_cancel_signaling(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
struct intel_breadcrumbs *b = &engine->breadcrumbs;
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&request->lock);
if (!READ_ONCE(request->signaling.wait.seqno))
return;
spin_lock(&b->rb_lock);
__intel_engine_remove_wait(engine, &request->signaling.wait);
if (fetch_and_zero(&request->signaling.wait.seqno))
__list_del_entry(&request->signaling.link);
spin_unlock(&b->rb_lock);
}
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct task_struct *tsk;
spin_lock_init(&b->rb_lock);
spin_lock_init(&b->irq_lock); spin_lock_init(&b->irq_lock);
INIT_LIST_HEAD(&b->signalers);
timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); init_irq_work(&b->irq_work, signal_irq_work);
timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
INIT_LIST_HEAD(&b->signals);
/* Spawn a thread to provide a common bottom-half for all signals.
* As this is an asynchronous interface we cannot steal the current
* task for handling the bottom-half to the user interrupt, therefore
* we create a thread to do the coherent seqno dance after the
* interrupt and then signal the waitqueue (via the dma-buf/fence).
*/
tsk = kthread_run(intel_breadcrumbs_signaler, engine,
"i915/signal:%d", engine->id);
if (IS_ERR(tsk))
return PTR_ERR(tsk);
b->signaler = tsk;
return 0;
}
static void cancel_fake_irq(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */
del_timer_sync(&b->hangcheck);
clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
} }
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
@ -815,13 +239,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
spin_lock_irqsave(&b->irq_lock, flags); spin_lock_irqsave(&b->irq_lock, flags);
/*
* Leave the fake_irq timer enabled (if it is running), but clear the
* bit so that it turns itself off on its next wake up and goes back
* to the long hangcheck interval if still required.
*/
clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
if (b->irq_enabled) if (b->irq_enabled)
irq_enable(engine); irq_enable(engine);
else else
@ -832,19 +249,99 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
{ {
struct intel_breadcrumbs *b = &engine->breadcrumbs;
/* The engines should be idle and all requests accounted for! */
WARN_ON(READ_ONCE(b->irq_wait));
WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
WARN_ON(!list_empty(&b->signals));
if (!IS_ERR_OR_NULL(b->signaler))
kthread_stop(b->signaler);
cancel_fake_irq(engine);
} }
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) bool i915_request_enable_breadcrumb(struct i915_request *rq)
#include "selftests/intel_breadcrumbs.c" {
#endif struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
return true;
spin_lock(&b->irq_lock);
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
!__request_completed(rq)) {
struct intel_context *ce = rq->hw_context;
struct list_head *pos;
__intel_breadcrumbs_arm_irq(b);
/*
* We keep the seqno in retirement order, so we can break
* inside intel_engine_breadcrumbs_irq as soon as we've passed
* the last completed request (or seen a request that hasn't
* event started). We could iterate the timeline->requests list,
* but keeping a separate signalers_list has the advantage of
* hopefully being much smaller than the full list and so
* provides faster iteration and detection when there are no
* more interrupts required for this context.
*
* We typically expect to add new signalers in order, so we
* start looking for our insertion point from the tail of
* the list.
*/
list_for_each_prev(pos, &ce->signals) {
struct i915_request *it =
list_entry(pos, typeof(*it), signal_link);
if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
break;
}
list_add(&rq->signal_link, pos);
if (pos == &ce->signals) /* catch transitions from empty list */
list_move_tail(&ce->signal_link, &b->signalers);
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
}
spin_unlock(&b->irq_lock);
return !__request_completed(rq);
}
void i915_request_cancel_breadcrumb(struct i915_request *rq)
{
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
return;
spin_lock(&b->irq_lock);
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
struct intel_context *ce = rq->hw_context;
list_del(&rq->signal_link);
if (list_empty(&ce->signals))
list_del_init(&ce->signal_link);
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
}
spin_unlock(&b->irq_lock);
}
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
struct drm_printer *p)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct intel_context *ce;
struct i915_request *rq;
if (list_empty(&b->signalers))
return;
drm_printf(p, "Signals:\n");
spin_lock_irq(&b->irq_lock);
list_for_each_entry(ce, &b->signalers, signal_link) {
list_for_each_entry(rq, &ce->signals, signal_link) {
drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
rq->fence.context, rq->fence.seqno,
i915_request_completed(rq) ? "!" :
i915_request_started(rq) ? "*" :
"",
jiffies_to_msecs(jiffies - rq->emitted_jiffies));
}
}
spin_unlock_irq(&b->irq_lock);
}

View file

@ -605,48 +605,48 @@ void intel_color_load_luts(struct intel_crtc_state *crtc_state)
dev_priv->display.load_luts(crtc_state); dev_priv->display.load_luts(crtc_state);
} }
static int check_lut_size(const struct drm_property_blob *lut, int expected)
{
int len;
if (!lut)
return 0;
len = drm_color_lut_size(lut);
if (len != expected) {
DRM_DEBUG_KMS("Invalid LUT size; got %d, expected %d\n",
len, expected);
return -EINVAL;
}
return 0;
}
int intel_color_check(struct intel_crtc_state *crtc_state) int intel_color_check(struct intel_crtc_state *crtc_state)
{ {
struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
size_t gamma_length, degamma_length; int gamma_length, degamma_length;
uint32_t tests = DRM_COLOR_LUT_NON_DECREASING; u32 gamma_tests, degamma_tests;
degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size; degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size;
gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size; gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size;
degamma_tests = INTEL_INFO(dev_priv)->color.degamma_lut_tests;
gamma_tests = INTEL_INFO(dev_priv)->color.gamma_lut_tests;
/* /* Always allow legacy gamma LUT with no further checking. */
* All of our platforms mandate that the degamma curve be
* non-decreasing. Additionally, GLK and gen11 only accept a single
* value for red, green, and blue in the degamma table. Make sure
* userspace didn't try to pass us something we can't handle.
*
* We don't have any extra hardware constraints on the gamma table,
* so no need to explicitly check it.
*/
if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
tests |= DRM_COLOR_LUT_EQUAL_CHANNELS;
if (drm_color_lut_check(crtc_state->base.degamma_lut, tests) != 0)
return -EINVAL;
/*
* We allow both degamma & gamma luts at the right size or
* NULL.
*/
if ((!crtc_state->base.degamma_lut ||
drm_color_lut_size(crtc_state->base.degamma_lut) == degamma_length) &&
(!crtc_state->base.gamma_lut ||
drm_color_lut_size(crtc_state->base.gamma_lut) == gamma_length))
return 0;
/*
* We also allow no degamma lut/ctm and a gamma lut at the legacy
* size (256 entries).
*/
if (crtc_state_is_legacy_gamma(crtc_state)) if (crtc_state_is_legacy_gamma(crtc_state))
return 0; return 0;
if (check_lut_size(crtc_state->base.degamma_lut, degamma_length) ||
check_lut_size(crtc_state->base.gamma_lut, gamma_length))
return -EINVAL; return -EINVAL;
if (drm_color_lut_check(crtc_state->base.degamma_lut, degamma_tests) ||
drm_color_lut_check(crtc_state->base.gamma_lut, gamma_tests))
return -EINVAL;
return 0;
} }
void intel_color_init(struct intel_crtc *crtc) void intel_color_init(struct intel_crtc *crtc)

View file

@ -995,7 +995,7 @@ static u32 hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll)
} }
} }
static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder,
const struct intel_crtc_state *crtc_state) const struct intel_crtc_state *crtc_state)
{ {
const struct intel_shared_dpll *pll = crtc_state->shared_dpll; const struct intel_shared_dpll *pll = crtc_state->shared_dpll;
@ -1004,10 +1004,11 @@ static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder,
switch (id) { switch (id) {
default: default:
/*
* DPLL_ID_ICL_DPLL0 and DPLL_ID_ICL_DPLL1 should not be used
* here, so do warn if this get passed in
*/
MISSING_CASE(id); MISSING_CASE(id);
/* fall through */
case DPLL_ID_ICL_DPLL0:
case DPLL_ID_ICL_DPLL1:
return DDI_CLK_SEL_NONE; return DDI_CLK_SEL_NONE;
case DPLL_ID_ICL_TBTPLL: case DPLL_ID_ICL_TBTPLL:
switch (clock) { switch (clock) {
@ -1021,7 +1022,7 @@ static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder,
return DDI_CLK_SEL_TBT_810; return DDI_CLK_SEL_TBT_810;
default: default:
MISSING_CASE(clock); MISSING_CASE(clock);
break; return DDI_CLK_SEL_NONE;
} }
case DPLL_ID_ICL_MGPLL1: case DPLL_ID_ICL_MGPLL1:
case DPLL_ID_ICL_MGPLL2: case DPLL_ID_ICL_MGPLL2:
@ -1391,16 +1392,17 @@ static int icl_calc_tbt_pll_link(struct drm_i915_private *dev_priv,
static int icl_calc_mg_pll_link(struct drm_i915_private *dev_priv, static int icl_calc_mg_pll_link(struct drm_i915_private *dev_priv,
enum port port) enum port port)
{ {
enum tc_port tc_port = intel_port_to_tc(dev_priv, port);
u32 mg_pll_div0, mg_clktop_hsclkctl; u32 mg_pll_div0, mg_clktop_hsclkctl;
u32 m1, m2_int, m2_frac, div1, div2, refclk; u32 m1, m2_int, m2_frac, div1, div2, refclk;
u64 tmp; u64 tmp;
refclk = dev_priv->cdclk.hw.ref; refclk = dev_priv->cdclk.hw.ref;
mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port));
mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(port)); mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port));
m1 = I915_READ(MG_PLL_DIV1(port)) & MG_PLL_DIV1_FBPREDIV_MASK; m1 = I915_READ(MG_PLL_DIV1(tc_port)) & MG_PLL_DIV1_FBPREDIV_MASK;
m2_int = mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK; m2_int = mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK;
m2_frac = (mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) ? m2_frac = (mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) ?
(mg_pll_div0 & MG_PLL_DIV0_FBDIV_FRAC_MASK) >> (mg_pll_div0 & MG_PLL_DIV0_FBDIV_FRAC_MASK) >>
@ -2868,7 +2870,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
if (IS_ICELAKE(dev_priv)) { if (IS_ICELAKE(dev_priv)) {
if (!intel_port_is_combophy(dev_priv, port)) if (!intel_port_is_combophy(dev_priv, port))
I915_WRITE(DDI_CLK_SEL(port), I915_WRITE(DDI_CLK_SEL(port),
icl_pll_to_ddi_pll_sel(encoder, crtc_state)); icl_pll_to_ddi_clk_sel(encoder, crtc_state));
} else if (IS_CANNONLAKE(dev_priv)) { } else if (IS_CANNONLAKE(dev_priv)) {
/* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */
val = I915_READ(DPCLKA_CFGCR0); val = I915_READ(DPCLKA_CFGCR0);

View file

@ -189,6 +189,8 @@ struct intel_device_info {
struct color_luts { struct color_luts {
u16 degamma_lut_size; u16 degamma_lut_size;
u16 gamma_lut_size; u16 gamma_lut_size;
u32 degamma_lut_tests;
u32 gamma_lut_tests;
} color; } color;
}; };

View file

@ -1758,6 +1758,35 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc)
return crtc->pipe; return crtc->pipe;
} }
static u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state)
{
struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
/*
* On i965gm the hardware frame counter reads
* zero when the TV encoder is enabled :(
*/
if (IS_I965GM(dev_priv) &&
(crtc_state->output_types & BIT(INTEL_OUTPUT_TVOUT)))
return 0;
if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
return 0xffffffff; /* full 32 bit counter */
else if (INTEL_GEN(dev_priv) >= 3)
return 0xffffff; /* only 24 bits of frame count */
else
return 0; /* Gen2 doesn't have a hardware frame counter */
}
static void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state)
{
struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
drm_crtc_set_max_vblank_count(&crtc->base,
intel_crtc_max_vblank_count(crtc_state));
drm_crtc_vblank_on(&crtc->base);
}
static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state)
{ {
struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
@ -1810,7 +1839,7 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state)
* when it's derived from the timestamps. So let's wait for the * when it's derived from the timestamps. So let's wait for the
* pipe to start properly before we call drm_crtc_vblank_on() * pipe to start properly before we call drm_crtc_vblank_on()
*/ */
if (dev_priv->drm.max_vblank_count == 0) if (intel_crtc_max_vblank_count(new_crtc_state) == 0)
intel_wait_for_pipe_scanline_moving(crtc); intel_wait_for_pipe_scanline_moving(crtc);
} }
@ -3901,6 +3930,16 @@ static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_sta
else if (old_crtc_state->pch_pfit.enabled) else if (old_crtc_state->pch_pfit.enabled)
ironlake_pfit_disable(old_crtc_state); ironlake_pfit_disable(old_crtc_state);
} }
/*
* We don't (yet) allow userspace to control the pipe background color,
* so force it to black, but apply pipe gamma and CSC so that its
* handling will match how we program our planes.
*/
if (INTEL_GEN(dev_priv) >= 9)
I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe),
SKL_BOTTOM_COLOR_GAMMA_ENABLE |
SKL_BOTTOM_COLOR_CSC_ENABLE);
} }
static void intel_fdi_normal_train(struct intel_crtc *crtc) static void intel_fdi_normal_train(struct intel_crtc *crtc)
@ -5678,7 +5717,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config,
ironlake_pch_enable(old_intel_state, pipe_config); ironlake_pch_enable(old_intel_state, pipe_config);
assert_vblank_disabled(crtc); assert_vblank_disabled(crtc);
drm_crtc_vblank_on(crtc); intel_crtc_vblank_on(pipe_config);
intel_encoders_enable(crtc, pipe_config, old_state); intel_encoders_enable(crtc, pipe_config, old_state);
@ -5832,7 +5871,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
intel_ddi_set_vc_payload_alloc(pipe_config, true); intel_ddi_set_vc_payload_alloc(pipe_config, true);
assert_vblank_disabled(crtc); assert_vblank_disabled(crtc);
drm_crtc_vblank_on(crtc); intel_crtc_vblank_on(pipe_config);
intel_encoders_enable(crtc, pipe_config, old_state); intel_encoders_enable(crtc, pipe_config, old_state);
@ -6171,7 +6210,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config,
intel_enable_pipe(pipe_config); intel_enable_pipe(pipe_config);
assert_vblank_disabled(crtc); assert_vblank_disabled(crtc);
drm_crtc_vblank_on(crtc); intel_crtc_vblank_on(pipe_config);
intel_encoders_enable(crtc, pipe_config, old_state); intel_encoders_enable(crtc, pipe_config, old_state);
} }
@ -6230,7 +6269,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
intel_enable_pipe(pipe_config); intel_enable_pipe(pipe_config);
assert_vblank_disabled(crtc); assert_vblank_disabled(crtc);
drm_crtc_vblank_on(crtc); intel_crtc_vblank_on(pipe_config);
intel_encoders_enable(crtc, pipe_config, old_state); intel_encoders_enable(crtc, pipe_config, old_state);
} }
@ -9416,7 +9455,7 @@ static void icelake_get_ddi_pll(struct drm_i915_private *dev_priv,
if (WARN_ON(!intel_dpll_is_combophy(id))) if (WARN_ON(!intel_dpll_is_combophy(id)))
return; return;
} else if (intel_port_is_tc(dev_priv, port)) { } else if (intel_port_is_tc(dev_priv, port)) {
id = icl_port_to_mg_pll_id(port); id = icl_tc_port_to_pll_id(intel_port_to_tc(dev_priv, port));
} else { } else {
WARN(1, "Invalid port %x\n", port); WARN(1, "Invalid port %x\n", port);
return; return;
@ -11690,6 +11729,23 @@ pipe_config_err(bool adjust, const char *name, const char *format, ...)
va_end(args); va_end(args);
} }
static bool fastboot_enabled(struct drm_i915_private *dev_priv)
{
if (i915_modparams.fastboot != -1)
return i915_modparams.fastboot;
/* Enable fastboot by default on Skylake and newer */
if (INTEL_GEN(dev_priv) >= 9)
return true;
/* Enable fastboot by default on VLV and CHV */
if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
return true;
/* Disabled by default on all others */
return false;
}
static bool static bool
intel_pipe_config_compare(struct drm_i915_private *dev_priv, intel_pipe_config_compare(struct drm_i915_private *dev_priv,
struct intel_crtc_state *current_config, struct intel_crtc_state *current_config,
@ -11701,7 +11757,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
(current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) &&
!(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED);
if (fixup_inherited && !i915_modparams.fastboot) { if (fixup_inherited && !fastboot_enabled(dev_priv)) {
DRM_DEBUG_KMS("initial modeset and fastboot not set\n"); DRM_DEBUG_KMS("initial modeset and fastboot not set\n");
ret = false; ret = false;
} }
@ -12778,8 +12834,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc)
{ {
struct drm_device *dev = crtc->base.dev; struct drm_device *dev = crtc->base.dev;
struct drm_vblank_crtc *vblank = &dev->vblank[drm_crtc_index(&crtc->base)];
if (!dev->max_vblank_count) if (!vblank->max_vblank_count)
return (u32)drm_crtc_accurate_vblank_count(&crtc->base); return (u32)drm_crtc_accurate_vblank_count(&crtc->base);
return dev->driver->get_vblank_counter(dev, crtc->pipe); return dev->driver->get_vblank_counter(dev, crtc->pipe);
@ -14327,8 +14384,10 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)
/* /*
* On some ICL SKUs port F is not present. No strap bits for * On some ICL SKUs port F is not present. No strap bits for
* this, so rely on VBT. * this, so rely on VBT.
* Work around broken VBTs on SKUs known to have no port F.
*/ */
if (intel_bios_is_port_present(dev_priv, PORT_F)) if (IS_ICL_WITH_PORT_F(dev_priv) &&
intel_bios_is_port_present(dev_priv, PORT_F))
intel_ddi_init(dev_priv, PORT_F); intel_ddi_init(dev_priv, PORT_F);
icl_dsi_init(dev_priv); icl_dsi_init(dev_priv);
@ -14680,14 +14739,6 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd);
if (fb->format->format == DRM_FORMAT_NV12 &&
(fb->width < SKL_MIN_YUV_420_SRC_W ||
fb->height < SKL_MIN_YUV_420_SRC_H ||
(fb->width % 4) != 0 || (fb->height % 4) != 0)) {
DRM_DEBUG_KMS("src dimensions not correct for NV12\n");
goto err;
}
for (i = 0; i < fb->format->num_planes; i++) { for (i = 0; i < fb->format->num_planes; i++) {
u32 stride_alignment; u32 stride_alignment;
@ -15457,6 +15508,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
plane->base.type != DRM_PLANE_TYPE_PRIMARY) plane->base.type != DRM_PLANE_TYPE_PRIMARY)
intel_plane_disable_noatomic(crtc, plane); intel_plane_disable_noatomic(crtc, plane);
} }
/*
* Disable any background color set by the BIOS, but enable the
* gamma and CSC to match how we program our planes.
*/
if (INTEL_GEN(dev_priv) >= 9)
I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe),
SKL_BOTTOM_COLOR_GAMMA_ENABLE |
SKL_BOTTOM_COLOR_CSC_ENABLE);
} }
/* Adjust the state of the output pipe according to whether we /* Adjust the state of the output pipe according to whether we
@ -15493,16 +15553,45 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
} }
} }
static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state)
{
struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
/*
* Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram
* the hardware when a high res displays plugged in. DPLL P
* divider is zero, and the pipe timings are bonkers. We'll
* try to disable everything in that case.
*
* FIXME would be nice to be able to sanitize this state
* without several WARNs, but for now let's take the easy
* road.
*/
return IS_GEN(dev_priv, 6) &&
crtc_state->base.active &&
crtc_state->shared_dpll &&
crtc_state->port_clock == 0;
}
static void intel_sanitize_encoder(struct intel_encoder *encoder) static void intel_sanitize_encoder(struct intel_encoder *encoder)
{ {
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct intel_connector *connector; struct intel_connector *connector;
struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
struct intel_crtc_state *crtc_state = crtc ?
to_intel_crtc_state(crtc->base.state) : NULL;
/* We need to check both for a crtc link (meaning that the /* We need to check both for a crtc link (meaning that the
* encoder is active and trying to read from a pipe) and the * encoder is active and trying to read from a pipe) and the
* pipe itself being active. */ * pipe itself being active. */
bool has_active_crtc = encoder->base.crtc && bool has_active_crtc = crtc_state &&
to_intel_crtc(encoder->base.crtc)->active; crtc_state->base.active;
if (crtc_state && has_bogus_dpll_config(crtc_state)) {
DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n",
pipe_name(crtc->pipe));
has_active_crtc = false;
}
connector = intel_encoder_find_connector(encoder); connector = intel_encoder_find_connector(encoder);
if (connector && !has_active_crtc) { if (connector && !has_active_crtc) {
@ -15513,16 +15602,25 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
/* Connector is active, but has no active pipe. This is /* Connector is active, but has no active pipe. This is
* fallout from our resume register restoring. Disable * fallout from our resume register restoring. Disable
* the encoder manually again. */ * the encoder manually again. */
if (encoder->base.crtc) { if (crtc_state) {
struct drm_crtc_state *crtc_state = encoder->base.crtc->state; struct drm_encoder *best_encoder;
DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n", DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n",
encoder->base.base.id, encoder->base.base.id,
encoder->base.name); encoder->base.name);
/* avoid oopsing in case the hooks consult best_encoder */
best_encoder = connector->base.state->best_encoder;
connector->base.state->best_encoder = &encoder->base;
if (encoder->disable) if (encoder->disable)
encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); encoder->disable(encoder, crtc_state,
connector->base.state);
if (encoder->post_disable) if (encoder->post_disable)
encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); encoder->post_disable(encoder, crtc_state,
connector->base.state);
connector->base.state->best_encoder = best_encoder;
} }
encoder->base.crtc = NULL; encoder->base.crtc = NULL;
@ -15894,10 +15992,12 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
* waits, so we need vblank interrupts restored beforehand. * waits, so we need vblank interrupts restored beforehand.
*/ */
for_each_intel_crtc(&dev_priv->drm, crtc) { for_each_intel_crtc(&dev_priv->drm, crtc) {
crtc_state = to_intel_crtc_state(crtc->base.state);
drm_crtc_vblank_reset(&crtc->base); drm_crtc_vblank_reset(&crtc->base);
if (crtc->base.state->active) if (crtc_state->base.active)
drm_crtc_vblank_on(&crtc->base); intel_crtc_vblank_on(crtc_state);
} }
intel_sanitize_plane_mapping(dev_priv); intel_sanitize_plane_mapping(dev_priv);

View file

@ -247,7 +247,7 @@ intel_find_shared_dpll(struct intel_crtc *crtc,
enum intel_dpll_id range_max) enum intel_dpll_id range_max)
{ {
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
struct intel_shared_dpll *pll; struct intel_shared_dpll *pll, *unused_pll = NULL;
struct intel_shared_dpll_state *shared_dpll; struct intel_shared_dpll_state *shared_dpll;
enum intel_dpll_id i; enum intel_dpll_id i;
@ -257,8 +257,11 @@ intel_find_shared_dpll(struct intel_crtc *crtc,
pll = &dev_priv->shared_dplls[i]; pll = &dev_priv->shared_dplls[i];
/* Only want to check enabled timings first */ /* Only want to check enabled timings first */
if (shared_dpll[i].crtc_mask == 0) if (shared_dpll[i].crtc_mask == 0) {
if (!unused_pll)
unused_pll = pll;
continue; continue;
}
if (memcmp(&crtc_state->dpll_hw_state, if (memcmp(&crtc_state->dpll_hw_state,
&shared_dpll[i].hw_state, &shared_dpll[i].hw_state,
@ -273,14 +276,11 @@ intel_find_shared_dpll(struct intel_crtc *crtc,
} }
/* Ok no matching timings, maybe there's a free one? */ /* Ok no matching timings, maybe there's a free one? */
for (i = range_min; i <= range_max; i++) { if (unused_pll) {
pll = &dev_priv->shared_dplls[i];
if (shared_dpll[i].crtc_mask == 0) {
DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n", DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n",
crtc->base.base.id, crtc->base.name, crtc->base.base.id, crtc->base.name,
pll->info->name); unused_pll->info->name);
return pll; return unused_pll;
}
} }
return NULL; return NULL;
@ -2639,14 +2639,14 @@ int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv,
return link_clock; return link_clock;
} }
static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id) static enum tc_port icl_pll_id_to_tc_port(enum intel_dpll_id id)
{ {
return id - DPLL_ID_ICL_MGPLL1 + PORT_C; return id - DPLL_ID_ICL_MGPLL1;
} }
enum intel_dpll_id icl_port_to_mg_pll_id(enum port port) enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port)
{ {
return port - PORT_C + DPLL_ID_ICL_MGPLL1; return tc_port + DPLL_ID_ICL_MGPLL1;
} }
bool intel_dpll_is_combophy(enum intel_dpll_id id) bool intel_dpll_is_combophy(enum intel_dpll_id id)
@ -2925,7 +2925,10 @@ icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state,
ret = icl_calc_dpll_state(crtc_state, encoder, clock, ret = icl_calc_dpll_state(crtc_state, encoder, clock,
&pll_state); &pll_state);
} else { } else {
min = icl_port_to_mg_pll_id(port); enum tc_port tc_port;
tc_port = intel_port_to_tc(dev_priv, port);
min = icl_tc_port_to_pll_id(tc_port);
max = min; max = min;
ret = icl_calc_mg_pll_state(crtc_state, encoder, clock, ret = icl_calc_mg_pll_state(crtc_state, encoder, clock,
&pll_state); &pll_state);
@ -2959,12 +2962,8 @@ static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id)
return CNL_DPLL_ENABLE(id); return CNL_DPLL_ENABLE(id);
else if (id == DPLL_ID_ICL_TBTPLL) else if (id == DPLL_ID_ICL_TBTPLL)
return TBT_PLL_ENABLE; return TBT_PLL_ENABLE;
else
/* return MG_PLL_ENABLE(icl_pll_id_to_tc_port(id));
* TODO: Make MG_PLL macros use
* tc port id instead of port id
*/
return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id));
} }
static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
@ -2974,7 +2973,6 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
const enum intel_dpll_id id = pll->info->id; const enum intel_dpll_id id = pll->info->id;
intel_wakeref_t wakeref; intel_wakeref_t wakeref;
bool ret = false; bool ret = false;
enum port port;
u32 val; u32 val;
wakeref = intel_display_power_get_if_enabled(dev_priv, wakeref = intel_display_power_get_if_enabled(dev_priv,
@ -2991,32 +2989,33 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id));
hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id));
} else { } else {
port = icl_mg_pll_id_to_port(id); enum tc_port tc_port = icl_pll_id_to_tc_port(id);
hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port));
hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(tc_port));
hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK; hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK;
hw_state->mg_clktop2_coreclkctl1 = hw_state->mg_clktop2_coreclkctl1 =
I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port));
hw_state->mg_clktop2_coreclkctl1 &= hw_state->mg_clktop2_coreclkctl1 &=
MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK;
hw_state->mg_clktop2_hsclkctl = hw_state->mg_clktop2_hsclkctl =
I915_READ(MG_CLKTOP2_HSCLKCTL(port)); I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port));
hw_state->mg_clktop2_hsclkctl &= hw_state->mg_clktop2_hsclkctl &=
MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK |
MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK |
MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK |
MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK; MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK;
hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port));
hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port)); hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(tc_port));
hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port)); hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(tc_port));
hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port)); hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(tc_port));
hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port)); hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(tc_port));
hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port)); hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(tc_port));
hw_state->mg_pll_tdc_coldst_bias = hw_state->mg_pll_tdc_coldst_bias =
I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));
if (dev_priv->cdclk.hw.ref == 38400) { if (dev_priv->cdclk.hw.ref == 38400) {
hw_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART; hw_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART;
@ -3051,7 +3050,7 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv,
struct intel_shared_dpll *pll) struct intel_shared_dpll *pll)
{ {
struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; struct intel_dpll_hw_state *hw_state = &pll->state.hw_state;
enum port port = icl_mg_pll_id_to_port(pll->info->id); enum tc_port tc_port = icl_pll_id_to_tc_port(pll->info->id);
u32 val; u32 val;
/* /*
@ -3060,41 +3059,41 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv,
* during the calc/readout phase if the mask depends on some other HW * during the calc/readout phase if the mask depends on some other HW
* state like refclk, see icl_calc_mg_pll_state(). * state like refclk, see icl_calc_mg_pll_state().
*/ */
val = I915_READ(MG_REFCLKIN_CTL(port)); val = I915_READ(MG_REFCLKIN_CTL(tc_port));
val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK; val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK;
val |= hw_state->mg_refclkin_ctl; val |= hw_state->mg_refclkin_ctl;
I915_WRITE(MG_REFCLKIN_CTL(port), val); I915_WRITE(MG_REFCLKIN_CTL(tc_port), val);
val = I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); val = I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port));
val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK;
val |= hw_state->mg_clktop2_coreclkctl1; val |= hw_state->mg_clktop2_coreclkctl1;
I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), val); I915_WRITE(MG_CLKTOP2_CORECLKCTL1(tc_port), val);
val = I915_READ(MG_CLKTOP2_HSCLKCTL(port)); val = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port));
val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK |
MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK |
MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK |
MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK); MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK);
val |= hw_state->mg_clktop2_hsclkctl; val |= hw_state->mg_clktop2_hsclkctl;
I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), val); I915_WRITE(MG_CLKTOP2_HSCLKCTL(tc_port), val);
I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0); I915_WRITE(MG_PLL_DIV0(tc_port), hw_state->mg_pll_div0);
I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1); I915_WRITE(MG_PLL_DIV1(tc_port), hw_state->mg_pll_div1);
I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf); I915_WRITE(MG_PLL_LF(tc_port), hw_state->mg_pll_lf);
I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock); I915_WRITE(MG_PLL_FRAC_LOCK(tc_port), hw_state->mg_pll_frac_lock);
I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc); I915_WRITE(MG_PLL_SSC(tc_port), hw_state->mg_pll_ssc);
val = I915_READ(MG_PLL_BIAS(port)); val = I915_READ(MG_PLL_BIAS(tc_port));
val &= ~hw_state->mg_pll_bias_mask; val &= ~hw_state->mg_pll_bias_mask;
val |= hw_state->mg_pll_bias; val |= hw_state->mg_pll_bias;
I915_WRITE(MG_PLL_BIAS(port), val); I915_WRITE(MG_PLL_BIAS(tc_port), val);
val = I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); val = I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));
val &= ~hw_state->mg_pll_tdc_coldst_bias_mask; val &= ~hw_state->mg_pll_tdc_coldst_bias_mask;
val |= hw_state->mg_pll_tdc_coldst_bias; val |= hw_state->mg_pll_tdc_coldst_bias;
I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), val); I915_WRITE(MG_PLL_TDC_COLDST_BIAS(tc_port), val);
POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port)); POSTING_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));
} }
static void icl_pll_enable(struct drm_i915_private *dev_priv, static void icl_pll_enable(struct drm_i915_private *dev_priv,

View file

@ -344,7 +344,7 @@ void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv,
int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv, int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv,
u32 pll_id); u32 pll_id);
int cnl_hdmi_pll_ref_clock(struct drm_i915_private *dev_priv); int cnl_hdmi_pll_ref_clock(struct drm_i915_private *dev_priv);
enum intel_dpll_id icl_port_to_mg_pll_id(enum port port); enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port);
bool intel_dpll_is_combophy(enum intel_dpll_id id); bool intel_dpll_is_combophy(enum intel_dpll_id id);
#endif /* _INTEL_DPLL_MGR_H_ */ #endif /* _INTEL_DPLL_MGR_H_ */

View file

@ -630,9 +630,11 @@ struct intel_crtc_scaler_state {
}; };
/* drm_mode->private_flags */ /* drm_mode->private_flags */
#define I915_MODE_FLAG_INHERITED 1 #define I915_MODE_FLAG_INHERITED (1<<0)
/* Flag to get scanline using frame time stamps */ /* Flag to get scanline using frame time stamps */
#define I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP (1<<1) #define I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP (1<<1)
/* Flag to use the scanline counter instead of the pixel counter */
#define I915_MODE_FLAG_USE_SCANLINE_COUNTER (1<<2)
struct intel_pipe_wm { struct intel_pipe_wm {
struct intel_wm_level wm[5]; struct intel_wm_level wm[5];

View file

@ -458,12 +458,6 @@ cleanup:
void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno) void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
{ {
intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
/* After manually advancing the seqno, fake the interrupt in case
* there are any waiters for that seqno.
*/
intel_engine_wakeup(engine);
GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
} }
@ -480,53 +474,67 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
execlists->queue_priority = INT_MIN; execlists->queue_priority_hint = INT_MIN;
execlists->queue = RB_ROOT_CACHED; execlists->queue = RB_ROOT_CACHED;
} }
/**
* intel_engines_setup_common - setup engine state not requiring hw access
* @engine: Engine to setup.
*
* Initializes @engine@ structure members shared between legacy and execlists
* submission modes which do not require hardware access.
*
* Typically done early in the submission mode specific engine setup stage.
*/
void intel_engine_setup_common(struct intel_engine_cs *engine)
{
i915_timeline_init(engine->i915, &engine->timeline, engine->name);
i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
intel_engine_init_execlist(engine);
intel_engine_init_hangcheck(engine);
intel_engine_init_batch_pool(engine);
intel_engine_init_cmd_parser(engine);
}
static void cleanup_status_page(struct intel_engine_cs *engine) static void cleanup_status_page(struct intel_engine_cs *engine)
{ {
struct i915_vma *vma;
/* Prevent writes into HWSP after returning the page to the system */ /* Prevent writes into HWSP after returning the page to the system */
intel_engine_set_hwsp_writemask(engine, ~0u); intel_engine_set_hwsp_writemask(engine, ~0u);
if (HWS_NEEDS_PHYSICAL(engine->i915)) { vma = fetch_and_zero(&engine->status_page.vma);
void *addr = fetch_and_zero(&engine->status_page.page_addr); if (!vma)
return;
__free_page(virt_to_page(addr)); if (!HWS_NEEDS_PHYSICAL(engine->i915))
i915_vma_unpin(vma);
i915_gem_object_unpin_map(vma->obj);
__i915_gem_object_release_unless_active(vma->obj);
} }
i915_vma_unpin_and_release(&engine->status_page.vma, static int pin_ggtt_status_page(struct intel_engine_cs *engine,
I915_VMA_RELEASE_MAP); struct i915_vma *vma)
{
unsigned int flags;
flags = PIN_GLOBAL;
if (!HAS_LLC(engine->i915))
/*
* On g33, we cannot place HWS above 256MiB, so
* restrict its pinning to the low mappable arena.
* Though this restriction is not documented for
* gen4, gen5, or byt, they also behave similarly
* and hang if the HWS is placed at the top of the
* GTT. To generalise, it appears that all !llc
* platforms have issues with us placing the HWS
* above the mappable region (even though we never
* actually map it).
*/
flags |= PIN_MAPPABLE;
else
flags |= PIN_HIGH;
return i915_vma_pin(vma, 0, 0, flags);
} }
static int init_status_page(struct intel_engine_cs *engine) static int init_status_page(struct intel_engine_cs *engine)
{ {
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
struct i915_vma *vma; struct i915_vma *vma;
unsigned int flags;
void *vaddr; void *vaddr;
int ret; int ret;
/*
* Though the HWS register does support 36bit addresses, historically
* we have had hangs and corruption reported due to wild writes if
* the HWS is placed above 4G. We only allow objects to be allocated
* in GFP_DMA32 for i965, and no earlier physical address users had
* access to more than 4G.
*/
obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
if (IS_ERR(obj)) { if (IS_ERR(obj)) {
DRM_ERROR("Failed to allocate status page\n"); DRM_ERROR("Failed to allocate status page\n");
@ -543,59 +551,67 @@ static int init_status_page(struct intel_engine_cs *engine)
goto err; goto err;
} }
flags = PIN_GLOBAL;
if (!HAS_LLC(engine->i915))
/* On g33, we cannot place HWS above 256MiB, so
* restrict its pinning to the low mappable arena.
* Though this restriction is not documented for
* gen4, gen5, or byt, they also behave similarly
* and hang if the HWS is placed at the top of the
* GTT. To generalise, it appears that all !llc
* platforms have issues with us placing the HWS
* above the mappable region (even though we never
* actually map it).
*/
flags |= PIN_MAPPABLE;
else
flags |= PIN_HIGH;
ret = i915_vma_pin(vma, 0, 0, flags);
if (ret)
goto err;
vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
if (IS_ERR(vaddr)) { if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr); ret = PTR_ERR(vaddr);
goto err;
}
engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
engine->status_page.vma = vma;
if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
ret = pin_ggtt_status_page(engine, vma);
if (ret)
goto err_unpin; goto err_unpin;
} }
engine->status_page.vma = vma;
engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
return 0; return 0;
err_unpin: err_unpin:
i915_vma_unpin(vma); i915_gem_object_unpin_map(obj);
err: err:
i915_gem_object_put(obj); i915_gem_object_put(obj);
return ret; return ret;
} }
static int init_phys_status_page(struct intel_engine_cs *engine) /**
{ * intel_engines_setup_common - setup engine state not requiring hw access
struct page *page; * @engine: Engine to setup.
*
/* * Initializes @engine@ structure members shared between legacy and execlists
* Though the HWS register does support 36bit addresses, historically * submission modes which do not require hardware access.
* we have had hangs and corruption reported due to wild writes if *
* the HWS is placed above 4G. * Typically done early in the submission mode specific engine setup stage.
*/ */
page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO); int intel_engine_setup_common(struct intel_engine_cs *engine)
if (!page) {
return -ENOMEM; int err;
engine->status_page.page_addr = page_address(page); err = init_status_page(engine);
if (err)
return err;
err = i915_timeline_init(engine->i915,
&engine->timeline,
engine->name,
engine->status_page.vma);
if (err)
goto err_hwsp;
i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlist(engine);
intel_engine_init_hangcheck(engine);
intel_engine_init_batch_pool(engine);
intel_engine_init_cmd_parser(engine);
return 0; return 0;
err_hwsp:
cleanup_status_page(engine);
return err;
} }
static void __intel_context_unpin(struct i915_gem_context *ctx, static void __intel_context_unpin(struct i915_gem_context *ctx,
@ -604,6 +620,56 @@ static void __intel_context_unpin(struct i915_gem_context *ctx,
intel_context_unpin(to_intel_context(ctx, engine)); intel_context_unpin(to_intel_context(ctx, engine));
} }
struct measure_breadcrumb {
struct i915_request rq;
struct i915_timeline timeline;
struct intel_ring ring;
u32 cs[1024];
};
static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
{
struct measure_breadcrumb *frame;
int dw = -ENOMEM;
GEM_BUG_ON(!engine->i915->gt.scratch);
frame = kzalloc(sizeof(*frame), GFP_KERNEL);
if (!frame)
return -ENOMEM;
if (i915_timeline_init(engine->i915,
&frame->timeline, "measure",
engine->status_page.vma))
goto out_frame;
INIT_LIST_HEAD(&frame->ring.request_list);
frame->ring.timeline = &frame->timeline;
frame->ring.vaddr = frame->cs;
frame->ring.size = sizeof(frame->cs);
frame->ring.effective_size = frame->ring.size;
intel_ring_update_space(&frame->ring);
frame->rq.i915 = engine->i915;
frame->rq.engine = engine;
frame->rq.ring = &frame->ring;
frame->rq.timeline = &frame->timeline;
dw = i915_timeline_pin(&frame->timeline);
if (dw < 0)
goto out_timeline;
dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
i915_timeline_unpin(&frame->timeline);
out_timeline:
i915_timeline_fini(&frame->timeline);
out_frame:
kfree(frame);
return dw;
}
/** /**
* intel_engines_init_common - initialize cengine state which might require hw access * intel_engines_init_common - initialize cengine state which might require hw access
* @engine: Engine to initialize. * @engine: Engine to initialize.
@ -646,21 +712,14 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
} }
} }
ret = intel_engine_init_breadcrumbs(engine); ret = measure_breadcrumb_dw(engine);
if (ret) if (ret < 0)
goto err_unpin_preempt; goto err_unpin_preempt;
if (HWS_NEEDS_PHYSICAL(i915)) engine->emit_fini_breadcrumb_dw = ret;
ret = init_phys_status_page(engine);
else
ret = init_status_page(engine);
if (ret)
goto err_breadcrumbs;
return 0; return 0;
err_breadcrumbs:
intel_engine_fini_breadcrumbs(engine);
err_unpin_preempt: err_unpin_preempt:
if (i915->preempt_context) if (i915->preempt_context)
__intel_context_unpin(i915->preempt_context, engine); __intel_context_unpin(i915->preempt_context, engine);
@ -1071,10 +1130,8 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
if (!reset_engines(i915) && !force) if (!reset_engines(i915) && !force)
return; return;
for_each_engine(engine, i915, id) { for_each_engine(engine, i915, id)
if (engine->reset.reset) intel_engine_reset(engine, false);
engine->reset.reset(engine, NULL);
}
} }
/** /**
@ -1110,7 +1167,7 @@ void intel_engines_park(struct drm_i915_private *i915)
} }
/* Must be reset upon idling, or we may miss the busy wakeup. */ /* Must be reset upon idling, or we may miss the busy wakeup. */
GEM_BUG_ON(engine->execlists.queue_priority != INT_MIN); GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
if (engine->park) if (engine->park)
engine->park(engine); engine->park(engine);
@ -1226,10 +1283,14 @@ static void print_request(struct drm_printer *m,
x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n", drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n",
prefix, prefix,
rq->global_seqno, rq->global_seqno,
i915_request_completed(rq) ? "!" : "", i915_request_completed(rq) ? "!" :
i915_request_started(rq) ? "*" :
"",
test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
&rq->fence.flags) ? "+" : "",
rq->fence.context, rq->fence.seqno, rq->fence.context, rq->fence.seqno,
buf, buf,
jiffies_to_msecs(jiffies - rq->emitted_jiffies), jiffies_to_msecs(jiffies - rq->emitted_jiffies),
@ -1320,7 +1381,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
} }
if (HAS_EXECLISTS(dev_priv)) { if (HAS_EXECLISTS(dev_priv)) {
const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; const u32 *hws =
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
unsigned int idx; unsigned int idx;
u8 read, write; u8 read, write;
@ -1363,9 +1425,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
char hdr[80]; char hdr[80];
snprintf(hdr, sizeof(hdr), snprintf(hdr, sizeof(hdr),
"\t\tELSP[%d] count=%d, ring->start=%08x, rq: ", "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ",
idx, count, idx, count,
i915_ggtt_offset(rq->ring->vma)); i915_ggtt_offset(rq->ring->vma),
rq->timeline->hwsp_offset);
print_request(m, rq, hdr); print_request(m, rq, hdr);
} else { } else {
drm_printf(m, "\t\tELSP[%d] idle\n", idx); drm_printf(m, "\t\tELSP[%d] idle\n", idx);
@ -1420,12 +1483,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m, struct drm_printer *m,
const char *header, ...) const char *header, ...)
{ {
struct intel_breadcrumbs * const b = &engine->breadcrumbs;
struct i915_gpu_error * const error = &engine->i915->gpu_error; struct i915_gpu_error * const error = &engine->i915->gpu_error;
struct i915_request *rq; struct i915_request *rq;
intel_wakeref_t wakeref; intel_wakeref_t wakeref;
unsigned long flags;
struct rb_node *rb;
if (header) { if (header) {
va_list ap; va_list ap;
@ -1475,6 +1535,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
rq->ring->emit); rq->ring->emit);
drm_printf(m, "\t\tring->space: 0x%08x\n", drm_printf(m, "\t\tring->space: 0x%08x\n",
rq->ring->space); rq->ring->space);
drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
rq->timeline->hwsp_offset);
print_request_ring(m, rq); print_request_ring(m, rq);
} }
@ -1491,21 +1553,12 @@ void intel_engine_dump(struct intel_engine_cs *engine,
intel_execlists_show_requests(engine, m, print_request, 8); intel_execlists_show_requests(engine, m, print_request, 8);
spin_lock_irqsave(&b->rb_lock, flags);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = rb_entry(rb, typeof(*w), node);
drm_printf(m, "\t%s [%d:%c] waiting for %x\n",
w->tsk->comm, w->tsk->pid,
task_state_to_char(w->tsk),
w->seqno);
}
spin_unlock_irqrestore(&b->rb_lock, flags);
drm_printf(m, "HWSP:\n"); drm_printf(m, "HWSP:\n");
hexdump(m, engine->status_page.page_addr, PAGE_SIZE); hexdump(m, engine->status_page.addr, PAGE_SIZE);
drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
intel_engine_print_breadcrumbs(engine, m);
} }
static u8 user_class_map[] = { static u8 user_class_map[] = {

View file

@ -112,7 +112,6 @@
#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
#define MI_USE_GGTT (1 << 22) /* g4x+ */ #define MI_USE_GGTT (1 << 22) /* g4x+ */
#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1)
#define MI_STORE_DWORD_INDEX_SHIFT 2
/* /*
* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
* - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw

View file

@ -81,6 +81,12 @@
* *
*/ */
static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
{
return (i915_ggtt_offset(engine->status_page.vma) +
I915_GEM_HWS_PREEMPT_ADDR);
}
static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{ {
return rb_entry(rb, struct i915_priolist, node); return rb_entry(rb, struct i915_priolist, node);
@ -623,6 +629,8 @@ static void inject_preempt_context(struct work_struct *work)
EXECLISTS_ACTIVE_PREEMPT); EXECLISTS_ACTIVE_PREEMPT);
tasklet_schedule(&engine->execlists.tasklet); tasklet_schedule(&engine->execlists.tasklet);
} }
(void)I915_SELFTEST_ONLY(engine->execlists.preempt_hang.count++);
} }
/* /*
@ -666,7 +674,7 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
execlists_unwind_incomplete_requests(execlists); execlists_unwind_incomplete_requests(execlists);
wait_for_guc_preempt_report(engine); wait_for_guc_preempt_report(engine);
intel_write_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX, 0); intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0);
} }
/** /**
@ -731,7 +739,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
if (intel_engine_has_preemption(engine)) { if (intel_engine_has_preemption(engine)) {
struct guc_preempt_work *preempt_work = struct guc_preempt_work *preempt_work =
&engine->i915->guc.preempt_work[engine->id]; &engine->i915->guc.preempt_work[engine->id];
int prio = execlists->queue_priority; int prio = execlists->queue_priority_hint;
if (__execlists_need_preempt(prio, port_prio(port))) { if (__execlists_need_preempt(prio, port_prio(port))) {
execlists_set_active(execlists, execlists_set_active(execlists,
@ -777,7 +785,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
kmem_cache_free(engine->i915->priorities, p); kmem_cache_free(engine->i915->priorities, p);
} }
done: done:
execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; execlists->queue_priority_hint =
rb ? to_priolist(rb)->priority : INT_MIN;
if (submit) if (submit)
port_assign(port, last); port_assign(port, last);
if (last) if (last)
@ -824,7 +833,7 @@ static void guc_submission_tasklet(unsigned long data)
} }
if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) && if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) == intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
GUC_PREEMPT_FINISHED) GUC_PREEMPT_FINISHED)
complete_preempt_context(engine); complete_preempt_context(engine);
@ -834,8 +843,7 @@ static void guc_submission_tasklet(unsigned long data)
spin_unlock_irqrestore(&engine->timeline.lock, flags); spin_unlock_irqrestore(&engine->timeline.lock, flags);
} }
static struct i915_request * static void guc_reset_prepare(struct intel_engine_cs *engine)
guc_reset_prepare(struct intel_engine_cs *engine)
{ {
struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_engine_execlists * const execlists = &engine->execlists;
@ -861,8 +869,6 @@ guc_reset_prepare(struct intel_engine_cs *engine)
*/ */
if (engine->i915->guc.preempt_wq) if (engine->i915->guc.preempt_wq)
flush_workqueue(engine->i915->guc.preempt_wq); flush_workqueue(engine->i915->guc.preempt_wq);
return i915_gem_find_active_request(engine);
} }
/* /*

View file

@ -25,6 +25,17 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_reset.h" #include "i915_reset.h"
struct hangcheck {
u64 acthd;
u32 seqno;
enum intel_engine_hangcheck_action action;
unsigned long action_timestamp;
int deadlock;
struct intel_instdone instdone;
bool wedged:1;
bool stalled:1;
};
static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
{ {
u32 tmp = current_instdone | *old_instdone; u32 tmp = current_instdone | *old_instdone;
@ -119,25 +130,22 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
} }
static void hangcheck_load_sample(struct intel_engine_cs *engine, static void hangcheck_load_sample(struct intel_engine_cs *engine,
struct intel_engine_hangcheck *hc) struct hangcheck *hc)
{ {
hc->acthd = intel_engine_get_active_head(engine); hc->acthd = intel_engine_get_active_head(engine);
hc->seqno = intel_engine_get_seqno(engine); hc->seqno = intel_engine_get_seqno(engine);
} }
static void hangcheck_store_sample(struct intel_engine_cs *engine, static void hangcheck_store_sample(struct intel_engine_cs *engine,
const struct intel_engine_hangcheck *hc) const struct hangcheck *hc)
{ {
engine->hangcheck.acthd = hc->acthd; engine->hangcheck.acthd = hc->acthd;
engine->hangcheck.seqno = hc->seqno; engine->hangcheck.seqno = hc->seqno;
engine->hangcheck.action = hc->action;
engine->hangcheck.stalled = hc->stalled;
engine->hangcheck.wedged = hc->wedged;
} }
static enum intel_engine_hangcheck_action static enum intel_engine_hangcheck_action
hangcheck_get_action(struct intel_engine_cs *engine, hangcheck_get_action(struct intel_engine_cs *engine,
const struct intel_engine_hangcheck *hc) const struct hangcheck *hc)
{ {
if (engine->hangcheck.seqno != hc->seqno) if (engine->hangcheck.seqno != hc->seqno)
return ENGINE_ACTIVE_SEQNO; return ENGINE_ACTIVE_SEQNO;
@ -149,7 +157,7 @@ hangcheck_get_action(struct intel_engine_cs *engine,
} }
static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
struct intel_engine_hangcheck *hc) struct hangcheck *hc)
{ {
unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT; unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
@ -265,19 +273,21 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
intel_uncore_arm_unclaimed_mmio_detection(dev_priv); intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
for_each_engine(engine, dev_priv, id) { for_each_engine(engine, dev_priv, id) {
struct intel_engine_hangcheck hc; struct hangcheck hc;
intel_engine_signal_breadcrumbs(engine);
hangcheck_load_sample(engine, &hc); hangcheck_load_sample(engine, &hc);
hangcheck_accumulate_sample(engine, &hc); hangcheck_accumulate_sample(engine, &hc);
hangcheck_store_sample(engine, &hc); hangcheck_store_sample(engine, &hc);
if (engine->hangcheck.stalled) { if (hc.stalled) {
hung |= intel_engine_flag(engine); hung |= intel_engine_flag(engine);
if (hc.action != ENGINE_DEAD) if (hc.action != ENGINE_DEAD)
stuck |= intel_engine_flag(engine); stuck |= intel_engine_flag(engine);
} }
if (engine->hangcheck.wedged) if (hc.wedged)
wedged |= intel_engine_flag(engine); wedged |= intel_engine_flag(engine);
} }

View file

@ -136,6 +136,7 @@
#include <drm/i915_drm.h> #include <drm/i915_drm.h>
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_gem_render_state.h" #include "i915_gem_render_state.h"
#include "i915_reset.h"
#include "i915_vgpu.h" #include "i915_vgpu.h"
#include "intel_lrc_reg.h" #include "intel_lrc_reg.h"
#include "intel_mocs.h" #include "intel_mocs.h"
@ -171,6 +172,12 @@ static void execlists_init_reg_state(u32 *reg_state,
struct intel_engine_cs *engine, struct intel_engine_cs *engine,
struct intel_ring *ring); struct intel_ring *ring);
static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
{
return (i915_ggtt_offset(engine->status_page.vma) +
I915_GEM_HWS_INDEX_ADDR);
}
static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{ {
return rb_entry(rb, struct i915_priolist, node); return rb_entry(rb, struct i915_priolist, node);
@ -181,13 +188,90 @@ static inline int rq_prio(const struct i915_request *rq)
return rq->sched.attr.priority; return rq->sched.attr.priority;
} }
static inline bool need_preempt(const struct intel_engine_cs *engine, static int queue_prio(const struct intel_engine_execlists *execlists)
const struct i915_request *last,
int prio)
{ {
return (intel_engine_has_preemption(engine) && struct i915_priolist *p;
__execlists_need_preempt(prio, rq_prio(last)) && struct rb_node *rb;
!i915_request_completed(last));
rb = rb_first_cached(&execlists->queue);
if (!rb)
return INT_MIN;
/*
* As the priolist[] are inverted, with the highest priority in [0],
* we have to flip the index value to become priority.
*/
p = to_priolist(rb);
return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
}
static inline bool need_preempt(const struct intel_engine_cs *engine,
const struct i915_request *rq)
{
const int last_prio = rq_prio(rq);
if (!intel_engine_has_preemption(engine))
return false;
if (i915_request_completed(rq))
return false;
/*
* Check if the current priority hint merits a preemption attempt.
*
* We record the highest value priority we saw during rescheduling
* prior to this dequeue, therefore we know that if it is strictly
* less than the current tail of ESLP[0], we do not need to force
* a preempt-to-idle cycle.
*
* However, the priority hint is a mere hint that we may need to
* preempt. If that hint is stale or we may be trying to preempt
* ourselves, ignore the request.
*/
if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
last_prio))
return false;
/*
* Check against the first request in ELSP[1], it will, thanks to the
* power of PI, be the highest priority of that context.
*/
if (!list_is_last(&rq->link, &engine->timeline.requests) &&
rq_prio(list_next_entry(rq, link)) > last_prio)
return true;
/*
* If the inflight context did not trigger the preemption, then maybe
* it was the set of queued requests? Pick the highest priority in
* the queue (the first active priolist) and see if it deserves to be
* running instead of ELSP[0].
*
* The highest priority request in the queue can not be either
* ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
* context, it's priority would not exceed ELSP[0] aka last_prio.
*/
return queue_prio(&engine->execlists) > last_prio;
}
__maybe_unused static inline bool
assert_priority_queue(const struct intel_engine_execlists *execlists,
const struct i915_request *prev,
const struct i915_request *next)
{
if (!prev)
return true;
/*
* Without preemption, the prev may refer to the still active element
* which we refuse to let go.
*
* Even with preemption, there are times when we think it is better not
* to preempt and leave an ostensibly lower priority request in flight.
*/
if (port_request(execlists->port) == prev)
return true;
return rq_prio(prev) >= rq_prio(next);
} }
/* /*
@ -264,7 +348,8 @@ static void unwind_wa_tail(struct i915_request *rq)
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
} }
static void __unwind_incomplete_requests(struct intel_engine_cs *engine) static struct i915_request *
__unwind_incomplete_requests(struct intel_engine_cs *engine)
{ {
struct i915_request *rq, *rn, *active = NULL; struct i915_request *rq, *rn, *active = NULL;
struct list_head *uninitialized_var(pl); struct list_head *uninitialized_var(pl);
@ -306,6 +391,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_move_tail(&active->sched.link, list_move_tail(&active->sched.link,
i915_sched_lookup_priolist(engine, prio)); i915_sched_lookup_priolist(engine, prio));
} }
return active;
} }
void void
@ -436,11 +523,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
desc = execlists_update_context(rq); desc = execlists_update_context(rq);
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
engine->name, n, engine->name, n,
port[n].context_id, count, port[n].context_id, count,
rq->global_seqno, rq->global_seqno,
rq->fence.context, rq->fence.seqno, rq->fence.context, rq->fence.seqno,
hwsp_seqno(rq),
intel_engine_get_seqno(engine), intel_engine_get_seqno(engine),
rq_prio(rq)); rq_prio(rq));
} else { } else {
@ -512,6 +600,8 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
} }
static void complete_preempt_context(struct intel_engine_execlists *execlists) static void complete_preempt_context(struct intel_engine_execlists *execlists)
@ -580,7 +670,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
return; return;
if (need_preempt(engine, last, execlists->queue_priority)) { if (need_preempt(engine, last)) {
inject_preempt_context(engine); inject_preempt_context(engine);
return; return;
} }
@ -613,7 +703,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* WaIdleLiteRestore:bdw,skl * WaIdleLiteRestore:bdw,skl
* Apply the wa NOOPs to prevent * Apply the wa NOOPs to prevent
* ring:HEAD == rq:TAIL as we resubmit the * ring:HEAD == rq:TAIL as we resubmit the
* request. See gen8_emit_breadcrumb() for * request. See gen8_emit_fini_breadcrumb() for
* where we prepare the padding after the * where we prepare the padding after the
* end of the request. * end of the request.
*/ */
@ -626,8 +716,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
int i; int i;
priolist_for_each_request_consume(rq, rn, p, i) { priolist_for_each_request_consume(rq, rn, p, i) {
GEM_BUG_ON(last && GEM_BUG_ON(!assert_priority_queue(execlists, last, rq));
need_preempt(engine, last, rq_prio(rq)));
/* /*
* Can we combine this request with the current port? * Can we combine this request with the current port?
@ -688,20 +777,20 @@ done:
/* /*
* Here be a bit of magic! Or sleight-of-hand, whichever you prefer. * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
* *
* We choose queue_priority such that if we add a request of greater * We choose the priority hint such that if we add a request of greater
* priority than this, we kick the submission tasklet to decide on * priority than this, we kick the submission tasklet to decide on
* the right order of submitting the requests to hardware. We must * the right order of submitting the requests to hardware. We must
* also be prepared to reorder requests as they are in-flight on the * also be prepared to reorder requests as they are in-flight on the
* HW. We derive the queue_priority then as the first "hole" in * HW. We derive the priority hint then as the first "hole" in
* the HW submission ports and if there are no available slots, * the HW submission ports and if there are no available slots,
* the priority of the lowest executing request, i.e. last. * the priority of the lowest executing request, i.e. last.
* *
* When we do receive a higher priority request ready to run from the * When we do receive a higher priority request ready to run from the
* user, see queue_request(), the queue_priority is bumped to that * user, see queue_request(), the priority hint is bumped to that
* request triggering preemption on the next dequeue (or subsequent * request triggering preemption on the next dequeue (or subsequent
* interrupt for secondary ports). * interrupt for secondary ports).
*/ */
execlists->queue_priority = execlists->queue_priority_hint =
port != execlists->port ? rq_prio(last) : INT_MIN; port != execlists->port ? rq_prio(last) : INT_MIN;
if (submit) { if (submit) {
@ -732,11 +821,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
while (num_ports-- && port_isset(port)) { while (num_ports-- && port_isset(port)) {
struct i915_request *rq = port_request(port); struct i915_request *rq = port_request(port);
GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n",
rq->engine->name, rq->engine->name,
(unsigned int)(port - execlists->port), (unsigned int)(port - execlists->port),
rq->global_seqno, rq->global_seqno,
rq->fence.context, rq->fence.seqno, rq->fence.context, rq->fence.seqno,
hwsp_seqno(rq),
intel_engine_get_seqno(rq->engine)); intel_engine_get_seqno(rq->engine));
GEM_BUG_ON(!execlists->active); GEM_BUG_ON(!execlists->active);
@ -820,10 +910,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
list_for_each_entry(rq, &engine->timeline.requests, link) { list_for_each_entry(rq, &engine->timeline.requests, link) {
GEM_BUG_ON(!rq->global_seqno); GEM_BUG_ON(!rq->global_seqno);
if (i915_request_signaled(rq)) if (!i915_request_signaled(rq))
continue;
dma_fence_set_error(&rq->fence, -EIO); dma_fence_set_error(&rq->fence, -EIO);
i915_request_mark_complete(rq);
} }
/* Flush the queued requests to the timeline list (for retiring). */ /* Flush the queued requests to the timeline list (for retiring). */
@ -833,9 +923,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
priolist_for_each_request_consume(rq, rn, p, i) { priolist_for_each_request_consume(rq, rn, p, i) {
list_del_init(&rq->sched.link); list_del_init(&rq->sched.link);
dma_fence_set_error(&rq->fence, -EIO);
__i915_request_submit(rq); __i915_request_submit(rq);
dma_fence_set_error(&rq->fence, -EIO);
i915_request_mark_complete(rq);
} }
rb_erase_cached(&p->node, &execlists->queue); rb_erase_cached(&p->node, &execlists->queue);
@ -849,7 +939,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
/* Remaining _unready_ requests will be nop'ed when submitted */ /* Remaining _unready_ requests will be nop'ed when submitted */
execlists->queue_priority = INT_MIN; execlists->queue_priority_hint = INT_MIN;
execlists->queue = RB_ROOT_CACHED; execlists->queue = RB_ROOT_CACHED;
GEM_BUG_ON(port_isset(execlists->port)); GEM_BUG_ON(port_isset(execlists->port));
@ -872,6 +962,8 @@ static void process_csb(struct intel_engine_cs *engine)
const u32 * const buf = execlists->csb_status; const u32 * const buf = execlists->csb_status;
u8 head, tail; u8 head, tail;
lockdep_assert_held(&engine->timeline.lock);
/* /*
* Note that csb_write, csb_status may be either in HWSP or mmio. * Note that csb_write, csb_status may be either in HWSP or mmio.
* When reading from the csb_write mmio register, we have to be * When reading from the csb_write mmio register, we have to be
@ -960,12 +1052,13 @@ static void process_csb(struct intel_engine_cs *engine)
EXECLISTS_ACTIVE_USER)); EXECLISTS_ACTIVE_USER));
rq = port_unpack(port, &count); rq = port_unpack(port, &count);
GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
engine->name, engine->name,
port->context_id, count, port->context_id, count,
rq ? rq->global_seqno : 0, rq ? rq->global_seqno : 0,
rq ? rq->fence.context : 0, rq ? rq->fence.context : 0,
rq ? rq->fence.seqno : 0, rq ? rq->fence.seqno : 0,
rq ? hwsp_seqno(rq) : 0,
intel_engine_get_seqno(engine), intel_engine_get_seqno(engine),
rq ? rq_prio(rq) : 0); rq ? rq_prio(rq) : 0);
@ -1079,8 +1172,8 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
static void submit_queue(struct intel_engine_cs *engine, int prio) static void submit_queue(struct intel_engine_cs *engine, int prio)
{ {
if (prio > engine->execlists.queue_priority) { if (prio > engine->execlists.queue_priority_hint) {
engine->execlists.queue_priority = prio; engine->execlists.queue_priority_hint = prio;
__submit_queue_imm(engine); __submit_queue_imm(engine);
} }
} }
@ -1173,6 +1266,24 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
return i915_vma_pin(vma, 0, 0, flags); return i915_vma_pin(vma, 0, 0, flags);
} }
static u32 make_rpcs(struct drm_i915_private *dev_priv);
static void
__execlists_update_reg_state(struct intel_engine_cs *engine,
struct intel_context *ce)
{
u32 *regs = ce->lrc_reg_state;
struct intel_ring *ring = ce->ring;
regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
regs[CTX_RING_HEAD + 1] = ring->head;
regs[CTX_RING_TAIL + 1] = ring->tail;
/* RPCS */
if (engine->class == RENDER_CLASS)
regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915);
}
static struct intel_context * static struct intel_context *
__execlists_context_pin(struct intel_engine_cs *engine, __execlists_context_pin(struct intel_engine_cs *engine,
struct i915_gem_context *ctx, struct i915_gem_context *ctx,
@ -1211,10 +1322,8 @@ __execlists_context_pin(struct intel_engine_cs *engine,
GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head)); GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head));
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
i915_ggtt_offset(ce->ring->vma); __execlists_update_reg_state(engine, ce);
ce->lrc_reg_state[CTX_RING_HEAD + 1] = ce->ring->head;
ce->lrc_reg_state[CTX_RING_TAIL + 1] = ce->ring->tail;
ce->state->obj->pin_global++; ce->state->obj->pin_global++;
i915_gem_context_get(ctx); i915_gem_context_get(ctx);
@ -1254,6 +1363,34 @@ execlists_context_pin(struct intel_engine_cs *engine,
return __execlists_context_pin(engine, ctx, ce); return __execlists_context_pin(engine, ctx, ce);
} }
static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
/*
* Check if we have been preempted before we even get started.
*
* After this point i915_request_started() reports true, even if
* we get preempted and so are no longer running.
*/
*cs++ = MI_ARB_CHECK;
*cs++ = MI_NOOP;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = rq->timeline->hwsp_offset;
*cs++ = 0;
*cs++ = rq->fence.seqno - 1;
intel_ring_advance(rq, cs);
return 0;
}
static int emit_pdps(struct i915_request *rq) static int emit_pdps(struct i915_request *rq)
{ {
const struct intel_engine_cs * const engine = rq->engine; const struct intel_engine_cs * const engine = rq->engine;
@ -1679,7 +1816,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
_MASKED_BIT_DISABLE(STOP_RING)); _MASKED_BIT_DISABLE(STOP_RING));
I915_WRITE(RING_HWS_PGA(engine->mmio_base), I915_WRITE(RING_HWS_PGA(engine->mmio_base),
engine->status_page.ggtt_offset); i915_ggtt_offset(engine->status_page.vma));
POSTING_READ(RING_HWS_PGA(engine->mmio_base)); POSTING_READ(RING_HWS_PGA(engine->mmio_base));
} }
@ -1716,11 +1853,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
return 0; return 0;
} }
static struct i915_request * static void execlists_reset_prepare(struct intel_engine_cs *engine)
execlists_reset_prepare(struct intel_engine_cs *engine)
{ {
struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request *request, *active;
unsigned long flags; unsigned long flags;
GEM_TRACE("%s: depth<-%d\n", engine->name, GEM_TRACE("%s: depth<-%d\n", engine->name,
@ -1736,59 +1871,21 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
* prevents the race. * prevents the race.
*/ */
__tasklet_disable_sync_once(&execlists->tasklet); __tasklet_disable_sync_once(&execlists->tasklet);
GEM_BUG_ON(!reset_in_progress(execlists));
/* And flush any current direct submission. */
spin_lock_irqsave(&engine->timeline.lock, flags); spin_lock_irqsave(&engine->timeline.lock, flags);
process_csb(engine); /* drain preemption events */
/*
* We want to flush the pending context switches, having disabled
* the tasklet above, we can assume exclusive access to the execlists.
* For this allows us to catch up with an inflight preemption event,
* and avoid blaming an innocent request if the stall was due to the
* preemption itself.
*/
process_csb(engine);
/*
* The last active request can then be no later than the last request
* now in ELSP[0]. So search backwards from there, so that if the GPU
* has advanced beyond the last CSB update, it will be pardoned.
*/
active = NULL;
request = port_request(execlists->port);
if (request) {
/*
* Prevent the breadcrumb from advancing before we decide
* which request is currently active.
*/
intel_engine_stop_cs(engine);
list_for_each_entry_from_reverse(request,
&engine->timeline.requests,
link) {
if (__i915_request_completed(request,
request->global_seqno))
break;
active = request;
}
}
spin_unlock_irqrestore(&engine->timeline.lock, flags); spin_unlock_irqrestore(&engine->timeline.lock, flags);
return active;
} }
static void execlists_reset(struct intel_engine_cs *engine, static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
struct i915_request *request)
{ {
struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request *rq;
unsigned long flags; unsigned long flags;
u32 *regs; u32 *regs;
GEM_TRACE("%s request global=%d, current=%d\n",
engine->name, request ? request->global_seqno : 0,
intel_engine_get_seqno(engine));
spin_lock_irqsave(&engine->timeline.lock, flags); spin_lock_irqsave(&engine->timeline.lock, flags);
/* /*
@ -1803,12 +1900,18 @@ static void execlists_reset(struct intel_engine_cs *engine,
execlists_cancel_port_requests(execlists); execlists_cancel_port_requests(execlists);
/* Push back any incomplete requests for replay after the reset. */ /* Push back any incomplete requests for replay after the reset. */
__unwind_incomplete_requests(engine); rq = __unwind_incomplete_requests(engine);
/* Following the reset, we need to reload the CSB read/write pointers */ /* Following the reset, we need to reload the CSB read/write pointers */
reset_csb_pointers(&engine->execlists); reset_csb_pointers(&engine->execlists);
spin_unlock_irqrestore(&engine->timeline.lock, flags); GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
engine->name,
rq ? rq->global_seqno : 0,
intel_engine_get_seqno(engine),
yesno(stalled));
if (!rq)
goto out_unlock;
/* /*
* If the request was innocent, we leave the request in the ELSP * If the request was innocent, we leave the request in the ELSP
@ -1821,8 +1924,9 @@ static void execlists_reset(struct intel_engine_cs *engine,
* and have to at least restore the RING register in the context * and have to at least restore the RING register in the context
* image back to the expected values to skip over the guilty request. * image back to the expected values to skip over the guilty request.
*/ */
if (!request || request->fence.error != -EIO) i915_reset_request(rq, stalled);
return; if (!stalled)
goto out_unlock;
/* /*
* We want a simple context + ring to execute the breadcrumb update. * We want a simple context + ring to execute the breadcrumb update.
@ -1832,25 +1936,22 @@ static void execlists_reset(struct intel_engine_cs *engine,
* future request will be after userspace has had the opportunity * future request will be after userspace has had the opportunity
* to recreate its own state. * to recreate its own state.
*/ */
regs = request->hw_context->lrc_reg_state; regs = rq->hw_context->lrc_reg_state;
if (engine->pinned_default_state) { if (engine->pinned_default_state) {
memcpy(regs, /* skip restoring the vanilla PPHWSP */ memcpy(regs, /* skip restoring the vanilla PPHWSP */
engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
engine->context_size - PAGE_SIZE); engine->context_size - PAGE_SIZE);
} }
execlists_init_reg_state(regs,
request->gem_context, engine, request->ring);
/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma); rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix);
intel_ring_update_space(rq->ring);
request->ring->head = intel_ring_wrap(request->ring, request->postfix); execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring);
regs[CTX_RING_HEAD + 1] = request->ring->head; __execlists_update_reg_state(engine, rq->hw_context);
intel_ring_update_space(request->ring); out_unlock:
spin_unlock_irqrestore(&engine->timeline.lock, flags);
/* Reset WaIdleLiteRestore:bdw,skl as well */
unwind_wa_tail(request);
} }
static void execlists_reset_finish(struct intel_engine_cs *engine) static void execlists_reset_finish(struct intel_engine_cs *engine)
@ -1863,6 +1964,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
* to sleep before we restart and reload a context. * to sleep before we restart and reload a context.
* *
*/ */
GEM_BUG_ON(!reset_in_progress(execlists));
if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
execlists->tasklet.func(execlists->tasklet.data); execlists->tasklet.func(execlists->tasklet.data);
@ -2035,53 +2137,62 @@ static int gen8_emit_flush_render(struct i915_request *request,
* used as a workaround for not being allowed to do lite * used as a workaround for not being allowed to do lite
* restore with HEAD==TAIL (WaIdleLiteRestore). * restore with HEAD==TAIL (WaIdleLiteRestore).
*/ */
static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs) static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
{ {
/* Ensure there's always at least one preemption point per-request. */ /* Ensure there's always at least one preemption point per-request. */
*cs++ = MI_ARB_CHECK; *cs++ = MI_ARB_CHECK;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
request->wa_tail = intel_ring_offset(request, cs); request->wa_tail = intel_ring_offset(request, cs);
return cs;
} }
static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
{ {
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
cs = gen8_emit_ggtt_write(cs, request->global_seqno, cs = gen8_emit_ggtt_write(cs,
request->fence.seqno,
request->timeline->hwsp_offset);
cs = gen8_emit_ggtt_write(cs,
request->global_seqno,
intel_hws_seqno_address(request->engine)); intel_hws_seqno_address(request->engine));
*cs++ = MI_USER_INTERRUPT; *cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
request->tail = intel_ring_offset(request, cs); request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail); assert_ring_tail_valid(request->ring, request->tail);
gen8_emit_wa_tail(request, cs); return gen8_emit_wa_tail(request, cs);
} }
static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{ {
/* We're using qword write, seqno should be aligned to 8 bytes. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
cs = gen8_emit_ggtt_write_rcs(cs, cs = gen8_emit_ggtt_write_rcs(cs,
request->global_seqno, request->fence.seqno,
intel_hws_seqno_address(request->engine), request->timeline->hwsp_offset,
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL); PIPE_CONTROL_CS_STALL);
cs = gen8_emit_ggtt_write_rcs(cs,
request->global_seqno,
intel_hws_seqno_address(request->engine),
PIPE_CONTROL_CS_STALL);
*cs++ = MI_USER_INTERRUPT; *cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
request->tail = intel_ring_offset(request, cs); request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail); assert_ring_tail_valid(request->ring, request->tail);
gen8_emit_wa_tail(request, cs); return gen8_emit_wa_tail(request, cs);
} }
static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
static int gen8_init_rcs_context(struct i915_request *rq) static int gen8_init_rcs_context(struct i915_request *rq)
{ {
@ -2173,8 +2284,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->request_alloc = execlists_request_alloc; engine->request_alloc = execlists_request_alloc;
engine->emit_flush = gen8_emit_flush; engine->emit_flush = gen8_emit_flush;
engine->emit_breadcrumb = gen8_emit_breadcrumb; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
engine->set_default_submission = intel_execlists_set_default_submission; engine->set_default_submission = intel_execlists_set_default_submission;
@ -2213,10 +2324,14 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
} }
static void static int
logical_ring_setup(struct intel_engine_cs *engine) logical_ring_setup(struct intel_engine_cs *engine)
{ {
intel_engine_setup_common(engine); int err;
err = intel_engine_setup_common(engine);
if (err)
return err;
/* Intentionally left blank. */ /* Intentionally left blank. */
engine->buffer = NULL; engine->buffer = NULL;
@ -2226,6 +2341,8 @@ logical_ring_setup(struct intel_engine_cs *engine)
logical_ring_default_vfuncs(engine); logical_ring_default_vfuncs(engine);
logical_ring_default_irqs(engine); logical_ring_default_irqs(engine);
return 0;
} }
static int logical_ring_init(struct intel_engine_cs *engine) static int logical_ring_init(struct intel_engine_cs *engine)
@ -2260,10 +2377,10 @@ static int logical_ring_init(struct intel_engine_cs *engine)
} }
execlists->csb_status = execlists->csb_status =
&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
execlists->csb_write = execlists->csb_write =
&engine->status_page.page_addr[intel_hws_csb_write_index(i915)]; &engine->status_page.addr[intel_hws_csb_write_index(i915)];
reset_csb_pointers(execlists); reset_csb_pointers(execlists);
@ -2274,13 +2391,14 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
{ {
int ret; int ret;
logical_ring_setup(engine); ret = logical_ring_setup(engine);
if (ret)
return ret;
/* Override some for render ring. */ /* Override some for render ring. */
engine->init_context = gen8_init_rcs_context; engine->init_context = gen8_init_rcs_context;
engine->emit_flush = gen8_emit_flush_render; engine->emit_flush = gen8_emit_flush_render;
engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz;
ret = logical_ring_init(engine); ret = logical_ring_init(engine);
if (ret) if (ret)
@ -2304,7 +2422,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
int logical_xcs_ring_init(struct intel_engine_cs *engine) int logical_xcs_ring_init(struct intel_engine_cs *engine)
{ {
logical_ring_setup(engine); int err;
err = logical_ring_setup(engine);
if (err)
return err;
return logical_ring_init(engine); return logical_ring_init(engine);
} }
@ -2534,8 +2656,7 @@ static void execlists_init_reg_state(u32 *regs,
if (rcs) { if (rcs) {
regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
make_rpcs(dev_priv));
i915_oa_init_reg_state(engine, ctx, regs); i915_oa_init_reg_state(engine, ctx, regs);
} }
@ -2638,7 +2759,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
goto error_deref_obj; goto error_deref_obj;
} }
timeline = i915_timeline_create(ctx->i915, ctx->name); timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
if (IS_ERR(timeline)) { if (IS_ERR(timeline)) {
ret = PTR_ERR(timeline); ret = PTR_ERR(timeline);
goto error_deref_obj; goto error_deref_obj;
@ -2696,12 +2817,8 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
intel_ring_reset(ce->ring, 0); intel_ring_reset(ce->ring, 0);
if (ce->pin_count) { /* otherwise done in context_pin */ if (ce->pin_count) /* otherwise done in context_pin */
u32 *regs = ce->lrc_reg_state; __execlists_update_reg_state(engine, ce);
regs[CTX_RING_HEAD + 1] = ce->ring->head;
regs[CTX_RING_TAIL + 1] = ce->ring->tail;
}
} }
} }
} }
@ -2740,7 +2857,9 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
last = NULL; last = NULL;
count = 0; count = 0;
drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); if (execlists->queue_priority_hint != INT_MIN)
drm_printf(m, "\t\tQueue priority hint: %d\n",
execlists->queue_priority_hint);
for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
int i; int i;

View file

@ -28,48 +28,60 @@
struct drm_i915_mocs_entry { struct drm_i915_mocs_entry {
u32 control_value; u32 control_value;
u16 l3cc_value; u16 l3cc_value;
u16 used;
}; };
struct drm_i915_mocs_table { struct drm_i915_mocs_table {
u32 size; unsigned int size;
unsigned int n_entries;
const struct drm_i915_mocs_entry *table; const struct drm_i915_mocs_entry *table;
}; };
/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
#define LE_CACHEABILITY(value) ((value) << 0) #define _LE_CACHEABILITY(value) ((value) << 0)
#define LE_TGT_CACHE(value) ((value) << 2) #define _LE_TGT_CACHE(value) ((value) << 2)
#define LE_LRUM(value) ((value) << 4) #define LE_LRUM(value) ((value) << 4)
#define LE_AOM(value) ((value) << 6) #define LE_AOM(value) ((value) << 6)
#define LE_RSC(value) ((value) << 7) #define LE_RSC(value) ((value) << 7)
#define LE_SCC(value) ((value) << 8) #define LE_SCC(value) ((value) << 8)
#define LE_PFM(value) ((value) << 11) #define LE_PFM(value) ((value) << 11)
#define LE_SCF(value) ((value) << 14) #define LE_SCF(value) ((value) << 14)
#define LE_COS(value) ((value) << 15)
#define LE_SSE(value) ((value) << 17)
/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
#define L3_ESC(value) ((value) << 0) #define L3_ESC(value) ((value) << 0)
#define L3_SCC(value) ((value) << 1) #define L3_SCC(value) ((value) << 1)
#define L3_CACHEABILITY(value) ((value) << 4) #define _L3_CACHEABILITY(value) ((value) << 4)
/* Helper defines */ /* Helper defines */
#define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */ #define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */
#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
/* (e)LLC caching options */ /* (e)LLC caching options */
#define LE_PAGETABLE 0 #define LE_0_PAGETABLE _LE_CACHEABILITY(0)
#define LE_UC 1 #define LE_1_UC _LE_CACHEABILITY(1)
#define LE_WT 2 #define LE_2_WT _LE_CACHEABILITY(2)
#define LE_WB 3 #define LE_3_WB _LE_CACHEABILITY(3)
/* L3 caching options */
#define L3_DIRECT 0
#define L3_UC 1
#define L3_RESERVED 2
#define L3_WB 3
/* Target cache */ /* Target cache */
#define LE_TC_PAGETABLE 0 #define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0)
#define LE_TC_LLC 1 #define LE_TC_1_LLC _LE_TGT_CACHE(1)
#define LE_TC_LLC_ELLC 2 #define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2)
#define LE_TC_LLC_ELLC_ALT 3 #define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3)
/* L3 caching options */
#define L3_0_DIRECT _L3_CACHEABILITY(0)
#define L3_1_UC _L3_CACHEABILITY(1)
#define L3_2_RESERVED _L3_CACHEABILITY(2)
#define L3_3_WB _L3_CACHEABILITY(3)
#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
[__idx] = { \
.control_value = __control_value, \
.l3cc_value = __l3cc_value, \
.used = 1, \
}
/* /*
* MOCS tables * MOCS tables
@ -80,85 +92,147 @@ struct drm_i915_mocs_table {
* LNCFCMOCS0 - LNCFCMOCS32 registers. * LNCFCMOCS0 - LNCFCMOCS32 registers.
* *
* These tables are intended to be kept reasonably consistent across * These tables are intended to be kept reasonably consistent across
* platforms. However some of the fields are not applicable to all of * HW platforms, and for ICL+, be identical across OSes. To achieve
* them. * that, for Icelake and above, list of entries is published as part
* of bspec.
* *
* Entries not part of the following tables are undefined as far as * Entries not part of the following tables are undefined as far as
* userspace is concerned and shouldn't be relied upon. For the time * userspace is concerned and shouldn't be relied upon. For the time
* being they will be implicitly initialized to the strictest caching * being they will be initialized to PTE.
* configuration (uncached) to guarantee forwards compatibility with
* userspace programs written against more recent kernels providing
* additional MOCS entries.
* *
* NOTE: These tables MUST start with being uncached and the length * The last two entries are reserved by the hardware. For ICL+ they
* MUST be less than 63 as the last two registers are reserved * should be initialized according to bspec and never used, for older
* by the hardware. These tables are part of the kernel ABI and * platforms they should never be written to.
* may only be updated incrementally by adding entries at the *
* end. * NOTE: These tables are part of bspec and defined as part of hardware
* interface for ICL+. For older platforms, they are part of kernel
* ABI. It is expected that, for specific hardware platform, existing
* entries will remain constant and the table will only be updated by
* adding new entries, filling unused positions.
*/ */
static const struct drm_i915_mocs_entry skylake_mocs_table[] = { #define GEN9_MOCS_ENTRIES \
[I915_MOCS_UNCACHED] = { MOCS_ENTRY(I915_MOCS_UNCACHED, \
/* 0x00000009 */ LE_1_UC | LE_TC_2_LLC_ELLC, \
.control_value = LE_CACHEABILITY(LE_UC) | L3_1_UC), \
LE_TGT_CACHE(LE_TC_LLC_ELLC) | MOCS_ENTRY(I915_MOCS_PTE, \
LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
LE_PFM(0) | LE_SCF(0), L3_3_WB)
/* 0x0010 */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
.l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), GEN9_MOCS_ENTRIES,
}, MOCS_ENTRY(I915_MOCS_CACHED,
[I915_MOCS_PTE] = { LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
/* 0x00000038 */ L3_3_WB)
.control_value = LE_CACHEABILITY(LE_PAGETABLE) |
LE_TGT_CACHE(LE_TC_LLC_ELLC) |
LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
LE_PFM(0) | LE_SCF(0),
/* 0x0030 */
.l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
},
[I915_MOCS_CACHED] = {
/* 0x0000003b */
.control_value = LE_CACHEABILITY(LE_WB) |
LE_TGT_CACHE(LE_TC_LLC_ELLC) |
LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
LE_PFM(0) | LE_SCF(0),
/* 0x0030 */
.l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
},
}; };
/* NOTE: the LE_TGT_CACHE is not used on Broxton */ /* NOTE: the LE_TGT_CACHE is not used on Broxton */
static const struct drm_i915_mocs_entry broxton_mocs_table[] = { static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
[I915_MOCS_UNCACHED] = { GEN9_MOCS_ENTRIES,
/* 0x00000009 */ MOCS_ENTRY(I915_MOCS_CACHED,
.control_value = LE_CACHEABILITY(LE_UC) | LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
LE_TGT_CACHE(LE_TC_LLC_ELLC) | L3_3_WB)
LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | };
LE_PFM(0) | LE_SCF(0),
/* 0x0010 */ #define GEN11_MOCS_ENTRIES \
.l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), /* Base - Uncached (Deprecated) */ \
}, MOCS_ENTRY(I915_MOCS_UNCACHED, \
[I915_MOCS_PTE] = { LE_1_UC | LE_TC_1_LLC, \
/* 0x00000038 */ L3_1_UC), \
.control_value = LE_CACHEABILITY(LE_PAGETABLE) | /* Base - L3 + LeCC:PAT (Deprecated) */ \
LE_TGT_CACHE(LE_TC_LLC_ELLC) | MOCS_ENTRY(I915_MOCS_PTE, \
LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_0_PAGETABLE | LE_TC_1_LLC, \
LE_PFM(0) | LE_SCF(0), L3_3_WB), \
/* Base - L3 + LLC */ \
MOCS_ENTRY(2, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
L3_3_WB), \
/* Base - Uncached */ \
MOCS_ENTRY(3, \
LE_1_UC | LE_TC_1_LLC, \
L3_1_UC), \
/* Base - L3 */ \
MOCS_ENTRY(4, \
LE_1_UC | LE_TC_1_LLC, \
L3_3_WB), \
/* Base - LLC */ \
MOCS_ENTRY(5, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
L3_1_UC), \
/* Age 0 - LLC */ \
MOCS_ENTRY(6, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
L3_1_UC), \
/* Age 0 - L3 + LLC */ \
MOCS_ENTRY(7, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
L3_3_WB), \
/* Age: Don't Chg. - LLC */ \
MOCS_ENTRY(8, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
L3_1_UC), \
/* Age: Don't Chg. - L3 + LLC */ \
MOCS_ENTRY(9, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
L3_3_WB), \
/* No AOM - LLC */ \
MOCS_ENTRY(10, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
L3_1_UC), \
/* No AOM - L3 + LLC */ \
MOCS_ENTRY(11, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
L3_3_WB), \
/* No AOM; Age 0 - LLC */ \
MOCS_ENTRY(12, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
L3_1_UC), \
/* No AOM; Age 0 - L3 + LLC */ \
MOCS_ENTRY(13, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
L3_3_WB), \
/* No AOM; Age:DC - LLC */ \
MOCS_ENTRY(14, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
L3_1_UC), \
/* No AOM; Age:DC - L3 + LLC */ \
MOCS_ENTRY(15, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
L3_3_WB), \
/* Self-Snoop - L3 + LLC */ \
MOCS_ENTRY(18, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
L3_3_WB), \
/* Skip Caching - L3 + LLC(12.5%) */ \
MOCS_ENTRY(19, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
L3_3_WB), \
/* Skip Caching - L3 + LLC(25%) */ \
MOCS_ENTRY(20, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
L3_3_WB), \
/* Skip Caching - L3 + LLC(50%) */ \
MOCS_ENTRY(21, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
L3_3_WB), \
/* Skip Caching - L3 + LLC(75%) */ \
MOCS_ENTRY(22, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
L3_3_WB), \
/* Skip Caching - L3 + LLC(87.5%) */ \
MOCS_ENTRY(23, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
L3_3_WB), \
/* HW Reserved - SW program but never use */ \
MOCS_ENTRY(62, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
L3_1_UC), \
/* HW Reserved - SW program but never use */ \
MOCS_ENTRY(63, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
L3_1_UC)
/* 0x0030 */ static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
.l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), GEN11_MOCS_ENTRIES
},
[I915_MOCS_CACHED] = {
/* 0x00000039 */
.control_value = LE_CACHEABILITY(LE_UC) |
LE_TGT_CACHE(LE_TC_LLC_ELLC) |
LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
LE_PFM(0) | LE_SCF(0),
/* 0x0030 */
.l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
},
}; };
/** /**
@ -178,13 +252,19 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
{ {
bool result = false; bool result = false;
if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) || if (IS_ICELAKE(dev_priv)) {
IS_ICELAKE(dev_priv)) { table->size = ARRAY_SIZE(icelake_mocs_table);
table->table = icelake_mocs_table;
table->n_entries = GEN11_NUM_MOCS_ENTRIES;
result = true;
} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
table->size = ARRAY_SIZE(skylake_mocs_table); table->size = ARRAY_SIZE(skylake_mocs_table);
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->table = skylake_mocs_table; table->table = skylake_mocs_table;
result = true; result = true;
} else if (IS_GEN9_LP(dev_priv)) { } else if (IS_GEN9_LP(dev_priv)) {
table->size = ARRAY_SIZE(broxton_mocs_table); table->size = ARRAY_SIZE(broxton_mocs_table);
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->table = broxton_mocs_table; table->table = broxton_mocs_table;
result = true; result = true;
} else { } else {
@ -226,6 +306,19 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
} }
} }
/*
* Get control_value from MOCS entry taking into account when it's not used:
* I915_MOCS_PTE's value is returned in this case.
*/
static u32 get_entry_control(const struct drm_i915_mocs_table *table,
unsigned int index)
{
if (table->table[index].used)
return table->table[index].control_value;
return table->table[I915_MOCS_PTE].control_value;
}
/** /**
* intel_mocs_init_engine() - emit the mocs control table * intel_mocs_init_engine() - emit the mocs control table
* @engine: The engine for whom to emit the registers. * @engine: The engine for whom to emit the registers.
@ -238,27 +331,23 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_private *dev_priv = engine->i915;
struct drm_i915_mocs_table table; struct drm_i915_mocs_table table;
unsigned int index; unsigned int index;
u32 unused_value;
if (!get_mocs_settings(dev_priv, &table)) if (!get_mocs_settings(dev_priv, &table))
return; return;
GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES); /* Set unused values to PTE */
unused_value = table.table[I915_MOCS_PTE].control_value;
for (index = 0; index < table.size; index++) for (index = 0; index < table.size; index++) {
I915_WRITE(mocs_register(engine->id, index), u32 value = get_entry_control(&table, index);
table.table[index].control_value);
/* I915_WRITE(mocs_register(engine->id, index), value);
* Ok, now set the unused entries to uncached. These entries }
* are officially undefined and no contract for the contents
* and settings is given for these entries. /* All remaining entries are also unused */
* for (; index < table.n_entries; index++)
* Entry 0 in the table is uncached - so we are just writing I915_WRITE(mocs_register(engine->id, index), unused_value);
* that value to all the used entries.
*/
for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
I915_WRITE(mocs_register(engine->id, index),
table.table[0].control_value);
} }
/** /**
@ -276,33 +365,32 @@ static int emit_mocs_control_table(struct i915_request *rq,
{ {
enum intel_engine_id engine = rq->engine->id; enum intel_engine_id engine = rq->engine->id;
unsigned int index; unsigned int index;
u32 unused_value;
u32 *cs; u32 *cs;
if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) if (GEM_WARN_ON(table->size > table->n_entries))
return -ENODEV; return -ENODEV;
cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); /* Set unused values to PTE */
unused_value = table->table[I915_MOCS_PTE].control_value;
cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES); *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
for (index = 0; index < table->size; index++) { for (index = 0; index < table->size; index++) {
u32 value = get_entry_control(table, index);
*cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
*cs++ = table->table[index].control_value; *cs++ = value;
} }
/* /* All remaining entries are also unused */
* Ok, now set the unused entries to uncached. These entries for (; index < table->n_entries; index++) {
* are officially undefined and no contract for the contents
* and settings is given for these entries.
*
* Entry 0 in the table is uncached - so we are just writing
* that value to all the used entries.
*/
for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
*cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
*cs++ = table->table[0].control_value; *cs++ = unused_value;
} }
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
@ -311,12 +399,24 @@ static int emit_mocs_control_table(struct i915_request *rq,
return 0; return 0;
} }
/*
* Get l3cc_value from MOCS entry taking into account when it's not used:
* I915_MOCS_PTE's value is returned in this case.
*/
static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
unsigned int index)
{
if (table->table[index].used)
return table->table[index].l3cc_value;
return table->table[I915_MOCS_PTE].l3cc_value;
}
static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
u16 low, u16 low,
u16 high) u16 high)
{ {
return table->table[low].l3cc_value | return low | high << 16;
table->table[high].l3cc_value << 16;
} }
/** /**
@ -333,38 +433,43 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
static int emit_mocs_l3cc_table(struct i915_request *rq, static int emit_mocs_l3cc_table(struct i915_request *rq,
const struct drm_i915_mocs_table *table) const struct drm_i915_mocs_table *table)
{ {
u16 unused_value;
unsigned int i; unsigned int i;
u32 *cs; u32 *cs;
if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) if (GEM_WARN_ON(table->size > table->n_entries))
return -ENODEV; return -ENODEV;
cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES); /* Set unused values to PTE */
unused_value = table->table[I915_MOCS_PTE].l3cc_value;
cs = intel_ring_begin(rq, 2 + table->n_entries);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
for (i = 0; i < table->size / 2; i++) { for (i = 0; i < table->size / 2; i++) {
u16 low = get_entry_l3cc(table, 2 * i);
u16 high = get_entry_l3cc(table, 2 * i + 1);
*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
*cs++ = l3cc_combine(table, 2 * i, 2 * i + 1); *cs++ = l3cc_combine(table, low, high);
} }
if (table->size & 0x01) {
/* Odd table size - 1 left over */ /* Odd table size - 1 left over */
if (table->size & 0x01) {
u16 low = get_entry_l3cc(table, 2 * i);
*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
*cs++ = l3cc_combine(table, 2 * i, 0); *cs++ = l3cc_combine(table, low, unused_value);
i++; i++;
} }
/* /* All remaining entries are also unused */
* Now set the rest of the table to uncached - use entry 0 as for (; i < table->n_entries / 2; i++) {
* this will be uncached. Leave the last pair uninitialised as
* they are reserved by the hardware.
*/
for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
*cs++ = l3cc_combine(table, 0, 0); *cs++ = l3cc_combine(table, unused_value, unused_value);
} }
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
@ -391,26 +496,35 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
{ {
struct drm_i915_mocs_table table; struct drm_i915_mocs_table table;
unsigned int i; unsigned int i;
u16 unused_value;
if (!get_mocs_settings(dev_priv, &table)) if (!get_mocs_settings(dev_priv, &table))
return; return;
for (i = 0; i < table.size/2; i++) /* Set unused values to PTE */
I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 2*i+1)); unused_value = table.table[I915_MOCS_PTE].l3cc_value;
for (i = 0; i < table.size / 2; i++) {
u16 low = get_entry_l3cc(&table, 2 * i);
u16 high = get_entry_l3cc(&table, 2 * i + 1);
I915_WRITE(GEN9_LNCFCMOCS(i),
l3cc_combine(&table, low, high));
}
/* Odd table size - 1 left over */ /* Odd table size - 1 left over */
if (table.size & 0x01) { if (table.size & 0x01) {
I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 0)); u16 low = get_entry_l3cc(&table, 2 * i);
I915_WRITE(GEN9_LNCFCMOCS(i),
l3cc_combine(&table, low, unused_value));
i++; i++;
} }
/* /* All remaining entries are also unused */
* Now set the rest of the table to uncached - use entry 0 as for (; i < table.n_entries / 2; i++)
* this will be uncached. Leave the last pair as initialised as I915_WRITE(GEN9_LNCFCMOCS(i),
* they are reserved by the hardware. l3cc_combine(&table, unused_value, unused_value));
*/
for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
} }
/** /**

View file

@ -480,8 +480,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv)
if (!overlay) if (!overlay)
return; return;
intel_overlay_release_old_vid(overlay);
overlay->old_xscale = 0; overlay->old_xscale = 0;
overlay->old_yscale = 0; overlay->old_yscale = 0;
overlay->crtc = NULL; overlay->crtc = NULL;

View file

@ -3631,14 +3631,9 @@ static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
* FIXME: We still don't have the proper code detect if we need to apply the WA, * FIXME: We still don't have the proper code detect if we need to apply the WA,
* so assume we'll always need it in order to avoid underruns. * so assume we'll always need it in order to avoid underruns.
*/ */
static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)
{ {
struct drm_i915_private *dev_priv = to_i915(state->base.dev); return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv);
if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv))
return true;
return false;
} }
static bool static bool
@ -3670,25 +3665,25 @@ intel_enable_sagv(struct drm_i915_private *dev_priv)
if (dev_priv->sagv_status == I915_SAGV_ENABLED) if (dev_priv->sagv_status == I915_SAGV_ENABLED)
return 0; return 0;
DRM_DEBUG_KMS("Enabling the SAGV\n"); DRM_DEBUG_KMS("Enabling SAGV\n");
mutex_lock(&dev_priv->pcu_lock); mutex_lock(&dev_priv->pcu_lock);
ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
GEN9_SAGV_ENABLE); GEN9_SAGV_ENABLE);
/* We don't need to wait for the SAGV when enabling */ /* We don't need to wait for SAGV when enabling */
mutex_unlock(&dev_priv->pcu_lock); mutex_unlock(&dev_priv->pcu_lock);
/* /*
* Some skl systems, pre-release machines in particular, * Some skl systems, pre-release machines in particular,
* don't actually have an SAGV. * don't actually have SAGV.
*/ */
if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
return 0; return 0;
} else if (ret < 0) { } else if (ret < 0) {
DRM_ERROR("Failed to enable the SAGV\n"); DRM_ERROR("Failed to enable SAGV\n");
return ret; return ret;
} }
@ -3707,7 +3702,7 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
if (dev_priv->sagv_status == I915_SAGV_DISABLED) if (dev_priv->sagv_status == I915_SAGV_DISABLED)
return 0; return 0;
DRM_DEBUG_KMS("Disabling the SAGV\n"); DRM_DEBUG_KMS("Disabling SAGV\n");
mutex_lock(&dev_priv->pcu_lock); mutex_lock(&dev_priv->pcu_lock);
/* bspec says to keep retrying for at least 1 ms */ /* bspec says to keep retrying for at least 1 ms */
@ -3719,14 +3714,14 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
/* /*
* Some skl systems, pre-release machines in particular, * Some skl systems, pre-release machines in particular,
* don't actually have an SAGV. * don't actually have SAGV.
*/ */
if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
return 0; return 0;
} else if (ret < 0) { } else if (ret < 0) {
DRM_ERROR("Failed to disable the SAGV (%d)\n", ret); DRM_ERROR("Failed to disable SAGV (%d)\n", ret);
return ret; return ret;
} }
@ -3757,7 +3752,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
sagv_block_time_us = 10; sagv_block_time_us = 10;
/* /*
* SKL+ workaround: bspec recommends we disable the SAGV when we have * SKL+ workaround: bspec recommends we disable SAGV when we have
* more then one pipe enabled * more then one pipe enabled
* *
* If there are no active CRTCs, no additional checks need be performed * If there are no active CRTCs, no additional checks need be performed
@ -3790,7 +3785,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
latency = dev_priv->wm.skl_latency[level]; latency = dev_priv->wm.skl_latency[level];
if (skl_needs_memory_bw_wa(intel_state) && if (skl_needs_memory_bw_wa(dev_priv) &&
plane->base.state->fb->modifier == plane->base.state->fb->modifier ==
I915_FORMAT_MOD_X_TILED) I915_FORMAT_MOD_X_TILED)
latency += 15; latency += 15;
@ -3798,7 +3793,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
/* /*
* If any of the planes on this pipe don't enable wm levels that * If any of the planes on this pipe don't enable wm levels that
* incur memory latencies higher than sagv_block_time_us we * incur memory latencies higher than sagv_block_time_us we
* can't enable the SAGV. * can't enable SAGV.
*/ */
if (latency < sagv_block_time_us) if (latency < sagv_block_time_us)
return false; return false;
@ -3827,8 +3822,13 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
/* /*
* 12GB/s is maximum BW supported by single DBuf slice. * 12GB/s is maximum BW supported by single DBuf slice.
*
* FIXME dbuf slice code is broken:
* - must wait for planes to stop using the slice before powering it off
* - plane straddling both slices is illegal in multi-pipe scenarios
* - should validate we stay within the hw bandwidth limits
*/ */
if (num_active > 1 || total_data_bw >= GBps(12)) { if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
ddb->enabled_slices = 2; ddb->enabled_slices = 2;
} else { } else {
ddb->enabled_slices = 1; ddb->enabled_slices = 1;
@ -4371,8 +4371,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
continue; continue;
wm = &cstate->wm.skl.optimal.planes[plane_id]; wm = &cstate->wm.skl.optimal.planes[plane_id];
blocks += wm->wm[level].plane_res_b; blocks += wm->wm[level].min_ddb_alloc;
blocks += wm->uv_wm[level].plane_res_b; blocks += wm->uv_wm[level].min_ddb_alloc;
} }
if (blocks < alloc_size) { if (blocks < alloc_size) {
@ -4413,7 +4413,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
extra = min_t(u16, alloc_size, extra = min_t(u16, alloc_size,
DIV64_U64_ROUND_UP(alloc_size * rate, DIV64_U64_ROUND_UP(alloc_size * rate,
total_data_rate)); total_data_rate));
total[plane_id] = wm->wm[level].plane_res_b + extra; total[plane_id] = wm->wm[level].min_ddb_alloc + extra;
alloc_size -= extra; alloc_size -= extra;
total_data_rate -= rate; total_data_rate -= rate;
@ -4424,7 +4424,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
extra = min_t(u16, alloc_size, extra = min_t(u16, alloc_size,
DIV64_U64_ROUND_UP(alloc_size * rate, DIV64_U64_ROUND_UP(alloc_size * rate,
total_data_rate)); total_data_rate));
uv_total[plane_id] = wm->uv_wm[level].plane_res_b + extra; uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra;
alloc_size -= extra; alloc_size -= extra;
total_data_rate -= rate; total_data_rate -= rate;
} }
@ -4477,7 +4477,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
*/ */
for_each_plane_id_on_crtc(intel_crtc, plane_id) { for_each_plane_id_on_crtc(intel_crtc, plane_id) {
wm = &cstate->wm.skl.optimal.planes[plane_id]; wm = &cstate->wm.skl.optimal.planes[plane_id];
if (wm->trans_wm.plane_res_b > total[plane_id]) if (wm->trans_wm.plane_res_b >= total[plane_id])
memset(&wm->trans_wm, 0, sizeof(wm->trans_wm)); memset(&wm->trans_wm, 0, sizeof(wm->trans_wm));
} }
@ -4579,9 +4579,6 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
const struct drm_plane_state *pstate = &intel_pstate->base; const struct drm_plane_state *pstate = &intel_pstate->base;
const struct drm_framebuffer *fb = pstate->fb; const struct drm_framebuffer *fb = pstate->fb;
u32 interm_pbpl; u32 interm_pbpl;
struct intel_atomic_state *state =
to_intel_atomic_state(cstate->base.state);
bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
/* only NV12 format has two planes */ /* only NV12 format has two planes */
if (color_plane == 1 && fb->format->format != DRM_FORMAT_NV12) { if (color_plane == 1 && fb->format->format != DRM_FORMAT_NV12) {
@ -4617,7 +4614,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
intel_pstate); intel_pstate);
if (INTEL_GEN(dev_priv) >= 11 && if (INTEL_GEN(dev_priv) >= 11 &&
fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8) fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 1)
wp->dbuf_block_size = 256; wp->dbuf_block_size = 256;
else else
wp->dbuf_block_size = 512; wp->dbuf_block_size = 512;
@ -4642,7 +4639,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
wp->y_min_scanlines = 4; wp->y_min_scanlines = 4;
} }
if (apply_memory_bw_wa) if (skl_needs_memory_bw_wa(dev_priv))
wp->y_min_scanlines *= 2; wp->y_min_scanlines *= 2;
wp->plane_bytes_per_line = wp->width * wp->cpp; wp->plane_bytes_per_line = wp->width * wp->cpp;
@ -4674,6 +4671,15 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
return 0; return 0;
} }
static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level)
{
if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
return true;
/* The number of lines are ignored for the level 0 watermark. */
return level > 0;
}
static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
const struct intel_plane_state *intel_pstate, const struct intel_plane_state *intel_pstate,
int level, int level,
@ -4686,10 +4692,10 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
u32 latency = dev_priv->wm.skl_latency[level]; u32 latency = dev_priv->wm.skl_latency[level];
uint_fixed_16_16_t method1, method2; uint_fixed_16_16_t method1, method2;
uint_fixed_16_16_t selected_result; uint_fixed_16_16_t selected_result;
u32 res_blocks, res_lines; u32 res_blocks, res_lines, min_ddb_alloc = 0;
struct intel_atomic_state *state =
to_intel_atomic_state(cstate->base.state); if (latency == 0)
bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); return;
/* Display WA #1141: kbl,cfl */ /* Display WA #1141: kbl,cfl */
if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) || if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
@ -4697,7 +4703,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
dev_priv->ipc_enabled) dev_priv->ipc_enabled)
latency += 4; latency += 4;
if (apply_memory_bw_wa && wp->x_tiled) if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled)
latency += 15; latency += 15;
method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate, method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
@ -4756,8 +4762,28 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
} }
} }
/* The number of lines are ignored for the level 0 watermark. */ if (INTEL_GEN(dev_priv) >= 11) {
if (level > 0 && res_lines > 31) if (wp->y_tiled) {
int extra_lines;
if (res_lines % wp->y_min_scanlines == 0)
extra_lines = wp->y_min_scanlines;
else
extra_lines = wp->y_min_scanlines * 2 -
res_lines % wp->y_min_scanlines;
min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines,
wp->plane_blocks_per_line);
} else {
min_ddb_alloc = res_blocks +
DIV_ROUND_UP(res_blocks, 10);
}
}
if (!skl_wm_has_lines(dev_priv, level))
res_lines = 0;
if (res_lines > 31)
return; return;
/* /*
@ -4768,6 +4794,8 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
*/ */
result->plane_res_b = res_blocks; result->plane_res_b = res_blocks;
result->plane_res_l = res_lines; result->plane_res_l = res_lines;
/* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */
result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1;
result->plane_en = true; result->plane_en = true;
} }
@ -4801,15 +4829,10 @@ skl_compute_linetime_wm(const struct intel_crtc_state *cstate)
u32 linetime_wm; u32 linetime_wm;
linetime_us = intel_get_linetime_us(cstate); linetime_us = intel_get_linetime_us(cstate);
if (is_fixed16_zero(linetime_us))
return 0;
linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us)); linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
/* Display WA #1135: bxt:ALL GLK:ALL */ /* Display WA #1135: BXT:ALL GLK:ALL */
if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) && if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled)
dev_priv->ipc_enabled)
linetime_wm /= 2; linetime_wm /= 2;
return linetime_wm; return linetime_wm;
@ -5118,6 +5141,23 @@ static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,
return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm); return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);
} }
static bool skl_pipe_wm_equals(struct intel_crtc *crtc,
const struct skl_pipe_wm *wm1,
const struct skl_pipe_wm *wm2)
{
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
enum plane_id plane_id;
for_each_plane_id_on_crtc(crtc, plane_id) {
if (!skl_plane_wm_equals(dev_priv,
&wm1->planes[plane_id],
&wm2->planes[plane_id]))
return false;
}
return wm1->linetime == wm2->linetime;
}
static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
const struct skl_ddb_entry *b) const struct skl_ddb_entry *b)
{ {
@ -5144,16 +5184,14 @@ static int skl_update_pipe_wm(struct intel_crtc_state *cstate,
struct skl_pipe_wm *pipe_wm, /* out */ struct skl_pipe_wm *pipe_wm, /* out */
bool *changed /* out */) bool *changed /* out */)
{ {
struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
int ret; int ret;
ret = skl_build_pipe_wm(cstate, pipe_wm); ret = skl_build_pipe_wm(cstate, pipe_wm);
if (ret) if (ret)
return ret; return ret;
if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm))) *changed = !skl_pipe_wm_equals(crtc, old_pipe_wm, pipe_wm);
*changed = false;
else
*changed = true;
return 0; return 0;
} }

View file

@ -33,6 +33,7 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_gem_render_state.h" #include "i915_gem_render_state.h"
#include "i915_reset.h"
#include "i915_trace.h" #include "i915_trace.h"
#include "intel_drv.h" #include "intel_drv.h"
#include "intel_workarounds.h" #include "intel_workarounds.h"
@ -42,6 +43,12 @@
*/ */
#define LEGACY_REQUEST_SIZE 200 #define LEGACY_REQUEST_SIZE 200
static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
{
return (i915_ggtt_offset(engine->status_page.vma) +
I915_GEM_HWS_INDEX_ADDR);
}
static unsigned int __intel_ring_space(unsigned int head, static unsigned int __intel_ring_space(unsigned int head,
unsigned int tail, unsigned int tail,
unsigned int size) unsigned int size)
@ -299,7 +306,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
return 0; return 0;
} }
static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
/* First we do the gen6_emit_post_sync_nonzero_flush w/a */ /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
*cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = GFX_OP_PIPE_CONTROL(4);
@ -319,6 +326,11 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_CS_STALL); PIPE_CONTROL_CS_STALL);
*cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
*cs++ = rq->fence.seqno;
*cs++ = GFX_OP_PIPE_CONTROL(4);
*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
*cs++ = rq->global_seqno; *cs++ = rq->global_seqno;
@ -327,8 +339,9 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
rq->tail = intel_ring_offset(rq, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
return cs;
} }
static const int gen6_rcs_emit_breadcrumb_sz = 14;
static int static int
gen7_render_ring_cs_stall_wa(struct i915_request *rq) gen7_render_ring_cs_stall_wa(struct i915_request *rq)
@ -409,7 +422,7 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
return 0; return 0;
} }
static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
*cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = GFX_OP_PIPE_CONTROL(4);
*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@ -419,6 +432,13 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL); PIPE_CONTROL_CS_STALL);
*cs++ = rq->timeline->hwsp_offset;
*cs++ = rq->fence.seqno;
*cs++ = GFX_OP_PIPE_CONTROL(4);
*cs++ = (PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL);
*cs++ = intel_hws_seqno_address(rq->engine); *cs++ = intel_hws_seqno_address(rq->engine);
*cs++ = rq->global_seqno; *cs++ = rq->global_seqno;
@ -427,34 +447,52 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
rq->tail = intel_ring_offset(rq, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
}
static const int gen7_rcs_emit_breadcrumb_sz = 6;
static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) return cs;
}
static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->fence.seqno;
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->global_seqno; *cs++ = rq->global_seqno;
*cs++ = MI_USER_INTERRUPT; *cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
rq->tail = intel_ring_offset(rq, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
return cs;
} }
static const int gen6_xcs_emit_breadcrumb_sz = 4;
#define GEN7_XCS_WA 32 #define GEN7_XCS_WA 32
static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
int i; int i;
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->fence.seqno;
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->global_seqno; *cs++ = rq->global_seqno;
for (i = 0; i < GEN7_XCS_WA; i++) { for (i = 0; i < GEN7_XCS_WA; i++) {
*cs++ = MI_STORE_DWORD_INDEX; *cs++ = MI_STORE_DWORD_INDEX;
*cs++ = I915_GEM_HWS_INDEX_ADDR; *cs++ = I915_GEM_HWS_SEQNO_ADDR;
*cs++ = rq->global_seqno; *cs++ = rq->fence.seqno;
} }
*cs++ = MI_FLUSH_DW; *cs++ = MI_FLUSH_DW;
@ -462,12 +500,12 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
*cs++ = 0; *cs++ = 0;
*cs++ = MI_USER_INTERRUPT; *cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
rq->tail = intel_ring_offset(rq, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
return cs;
} }
static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
#undef GEN7_XCS_WA #undef GEN7_XCS_WA
static void set_hwstam(struct intel_engine_cs *engine, u32 mask) static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
@ -498,12 +536,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
I915_WRITE(HWS_PGA, addr); I915_WRITE(HWS_PGA, addr);
} }
static struct page *status_page(struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
return sg_page(obj->mm.pages->sgl);
}
static void ring_setup_phys_status_page(struct intel_engine_cs *engine) static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
{ {
struct page *page = virt_to_page(engine->status_page.page_addr); set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
set_hws_pga(engine, phys);
set_hwstam(engine, ~0u); set_hwstam(engine, ~0u);
} }
@ -570,7 +613,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
static void ring_setup_status_page(struct intel_engine_cs *engine) static void ring_setup_status_page(struct intel_engine_cs *engine)
{ {
set_hwsp(engine, engine->status_page.ggtt_offset); set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
set_hwstam(engine, ~0u); set_hwstam(engine, ~0u);
flush_cs_tlb(engine); flush_cs_tlb(engine);
@ -700,59 +743,87 @@ static int init_ring_common(struct intel_engine_cs *engine)
} }
/* Papering over lost _interrupts_ immediately following the restart */ /* Papering over lost _interrupts_ immediately following the restart */
intel_engine_wakeup(engine); intel_engine_queue_breadcrumbs(engine);
out: out:
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
return ret; return ret;
} }
static struct i915_request *reset_prepare(struct intel_engine_cs *engine) static void reset_prepare(struct intel_engine_cs *engine)
{ {
intel_engine_stop_cs(engine); intel_engine_stop_cs(engine);
return i915_gem_find_active_request(engine);
} }
static void skip_request(struct i915_request *rq) static void reset_ring(struct intel_engine_cs *engine, bool stalled)
{ {
void *vaddr = rq->ring->vaddr; struct i915_timeline *tl = &engine->timeline;
struct i915_request *pos, *rq;
unsigned long flags;
u32 head; u32 head;
head = rq->infix; rq = NULL;
if (rq->postfix < head) { spin_lock_irqsave(&tl->lock, flags);
memset32(vaddr + head, MI_NOOP, list_for_each_entry(pos, &tl->requests, link) {
(rq->ring->size - head) / sizeof(u32)); if (!i915_request_completed(pos)) {
head = 0; rq = pos;
break;
} }
memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32));
} }
static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq) GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
{ engine->name,
GEM_TRACE("%s request global=%d, current=%d\n", rq ? rq->global_seqno : 0,
engine->name, rq ? rq->global_seqno : 0, intel_engine_get_seqno(engine),
intel_engine_get_seqno(engine)); yesno(stalled));
/* /*
* Try to restore the logical GPU state to match the continuation * The guilty request will get skipped on a hung engine.
* of the request queue. If we skip the context/PD restore, then
* the next request may try to execute assuming that its context
* is valid and loaded on the GPU and so may try to access invalid
* memory, prompting repeated GPU hangs.
* *
* If the request was guilty, we still restore the logical state * Users of client default contexts do not rely on logical
* in case the next request requires it (e.g. the aliasing ppgtt), * state preserved between batches so it is safe to execute
* but skip over the hung batch. * queued requests following the hang. Non default contexts
* rely on preserved state, so skipping a batch loses the
* evolution of the state and it needs to be considered corrupted.
* Executing more queued batches on top of corrupted state is
* risky. But we take the risk by trying to advance through
* the queued requests in order to make the client behaviour
* more predictable around resets, by not throwing away random
* amount of batches it has prepared for execution. Sophisticated
* clients can use gem_reset_stats_ioctl and dma fence status
* (exported via sync_file info ioctl on explicit fences) to observe
* when it loses the context state and should rebuild accordingly.
* *
* If the request was innocent, we try to replay the request with * The context ban, and ultimately the client ban, mechanism are safety
* the restored context. * valves if client submission ends up resulting in nothing more than
* subsequent hangs.
*/ */
if (rq) { if (rq) {
/* If the rq hung, jump to its breadcrumb and skip the batch */ /*
rq->ring->head = intel_ring_wrap(rq->ring, rq->head); * Try to restore the logical GPU state to match the
if (rq->fence.error == -EIO) * continuation of the request queue. If we skip the
skip_request(rq); * context/PD restore, then the next request may try to execute
* assuming that its context is valid and loaded on the GPU and
* so may try to access invalid memory, prompting repeated GPU
* hangs.
*
* If the request was guilty, we still restore the logical
* state in case the next request requires it (e.g. the
* aliasing ppgtt), but skip over the hung batch.
*
* If the request was innocent, we try to replay the request
* with the restored context.
*/
i915_reset_request(rq, stalled);
GEM_BUG_ON(rq->ring != engine->buffer);
head = rq->head;
} else {
head = engine->buffer->tail;
} }
engine->buffer->head = intel_ring_wrap(engine->buffer, head);
spin_unlock_irqrestore(&tl->lock, flags);
} }
static void reset_finish(struct intel_engine_cs *engine) static void reset_finish(struct intel_engine_cs *engine)
@ -836,10 +907,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
list_for_each_entry(request, &engine->timeline.requests, link) { list_for_each_entry(request, &engine->timeline.requests, link) {
GEM_BUG_ON(!request->global_seqno); GEM_BUG_ON(!request->global_seqno);
if (i915_request_signaled(request)) if (!i915_request_signaled(request))
continue;
dma_fence_set_error(&request->fence, -EIO); dma_fence_set_error(&request->fence, -EIO);
i915_request_mark_complete(request);
} }
intel_write_status_page(engine, intel_write_status_page(engine,
@ -861,29 +932,43 @@ static void i9xx_submit_request(struct i915_request *request)
intel_ring_set_tail(request->ring, request->tail)); intel_ring_set_tail(request->ring, request->tail));
} }
static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH; *cs++ = MI_FLUSH;
*cs++ = MI_STORE_DWORD_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR;
*cs++ = rq->fence.seqno;
*cs++ = MI_STORE_DWORD_INDEX; *cs++ = MI_STORE_DWORD_INDEX;
*cs++ = I915_GEM_HWS_INDEX_ADDR; *cs++ = I915_GEM_HWS_INDEX_ADDR;
*cs++ = rq->global_seqno; *cs++ = rq->global_seqno;
*cs++ = MI_USER_INTERRUPT; *cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
rq->tail = intel_ring_offset(rq, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
return cs;
} }
static const int i9xx_emit_breadcrumb_sz = 6;
#define GEN5_WA_STORES 8 /* must be at least 1! */ #define GEN5_WA_STORES 8 /* must be at least 1! */
static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
int i; int i;
GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH; *cs++ = MI_FLUSH;
*cs++ = MI_STORE_DWORD_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR;
*cs++ = rq->fence.seqno;
BUILD_BUG_ON(GEN5_WA_STORES < 1); BUILD_BUG_ON(GEN5_WA_STORES < 1);
for (i = 0; i < GEN5_WA_STORES; i++) { for (i = 0; i < GEN5_WA_STORES; i++) {
*cs++ = MI_STORE_DWORD_INDEX; *cs++ = MI_STORE_DWORD_INDEX;
@ -892,11 +977,13 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
} }
*cs++ = MI_USER_INTERRUPT; *cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
rq->tail = intel_ring_offset(rq, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
return cs;
} }
static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
#undef GEN5_WA_STORES #undef GEN5_WA_STORES
static void static void
@ -1123,6 +1210,10 @@ int intel_ring_pin(struct intel_ring *ring)
GEM_BUG_ON(ring->vaddr); GEM_BUG_ON(ring->vaddr);
ret = i915_timeline_pin(ring->timeline);
if (ret)
return ret;
flags = PIN_GLOBAL; flags = PIN_GLOBAL;
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */ /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
@ -1139,28 +1230,32 @@ int intel_ring_pin(struct intel_ring *ring)
else else
ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
if (unlikely(ret)) if (unlikely(ret))
return ret; goto unpin_timeline;
} }
ret = i915_vma_pin(vma, 0, 0, flags); ret = i915_vma_pin(vma, 0, 0, flags);
if (unlikely(ret)) if (unlikely(ret))
return ret; goto unpin_timeline;
if (i915_vma_is_map_and_fenceable(vma)) if (i915_vma_is_map_and_fenceable(vma))
addr = (void __force *)i915_vma_pin_iomap(vma); addr = (void __force *)i915_vma_pin_iomap(vma);
else else
addr = i915_gem_object_pin_map(vma->obj, map); addr = i915_gem_object_pin_map(vma->obj, map);
if (IS_ERR(addr)) if (IS_ERR(addr)) {
goto err; ret = PTR_ERR(addr);
goto unpin_ring;
}
vma->obj->pin_global++; vma->obj->pin_global++;
ring->vaddr = addr; ring->vaddr = addr;
return 0; return 0;
err: unpin_ring:
i915_vma_unpin(vma); i915_vma_unpin(vma);
return PTR_ERR(addr); unpin_timeline:
i915_timeline_unpin(ring->timeline);
return ret;
} }
void intel_ring_reset(struct intel_ring *ring, u32 tail) void intel_ring_reset(struct intel_ring *ring, u32 tail)
@ -1189,6 +1284,8 @@ void intel_ring_unpin(struct intel_ring *ring)
ring->vma->obj->pin_global--; ring->vma->obj->pin_global--;
i915_vma_unpin(ring->vma); i915_vma_unpin(ring->vma);
i915_timeline_unpin(ring->timeline);
} }
static struct i915_vma * static struct i915_vma *
@ -1499,13 +1596,18 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
struct intel_ring *ring; struct intel_ring *ring;
int err; int err;
intel_engine_setup_common(engine); err = intel_engine_setup_common(engine);
if (err)
return err;
timeline = i915_timeline_create(engine->i915, engine->name); timeline = i915_timeline_create(engine->i915,
engine->name,
engine->status_page.vma);
if (IS_ERR(timeline)) { if (IS_ERR(timeline)) {
err = PTR_ERR(timeline); err = PTR_ERR(timeline);
goto err; goto err;
} }
GEM_BUG_ON(timeline->has_initial_breadcrumb);
ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
i915_timeline_put(timeline); i915_timeline_put(timeline);
@ -1525,6 +1627,8 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
if (err) if (err)
goto err_unpin; goto err_unpin;
GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
return 0; return 0;
err_unpin: err_unpin:
@ -1857,6 +1961,7 @@ static int ring_request_alloc(struct i915_request *request)
int ret; int ret;
GEM_BUG_ON(!request->hw_context->pin_count); GEM_BUG_ON(!request->hw_context->pin_count);
GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
/* /*
* Flush enough space to reduce the likelihood of waiting after * Flush enough space to reduce the likelihood of waiting after
@ -2193,12 +2298,14 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
engine->context_pin = intel_ring_context_pin; engine->context_pin = intel_ring_context_pin;
engine->request_alloc = ring_request_alloc; engine->request_alloc = ring_request_alloc;
engine->emit_breadcrumb = i9xx_emit_breadcrumb; /*
engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; * Using a global execution timeline; the previous final breadcrumb is
if (IS_GEN(dev_priv, 5)) { * equivalent to our next initial bread so we can elide
engine->emit_breadcrumb = gen5_emit_breadcrumb; * engine->emit_init_breadcrumb().
engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz; */
} engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
if (IS_GEN(dev_priv, 5))
engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
engine->set_default_submission = i9xx_set_default_submission; engine->set_default_submission = i9xx_set_default_submission;
@ -2227,13 +2334,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
if (INTEL_GEN(dev_priv) >= 7) { if (INTEL_GEN(dev_priv) >= 7) {
engine->init_context = intel_rcs_ctx_init; engine->init_context = intel_rcs_ctx_init;
engine->emit_flush = gen7_render_ring_flush; engine->emit_flush = gen7_render_ring_flush;
engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb; engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
} else if (IS_GEN(dev_priv, 6)) { } else if (IS_GEN(dev_priv, 6)) {
engine->init_context = intel_rcs_ctx_init; engine->init_context = intel_rcs_ctx_init;
engine->emit_flush = gen6_render_ring_flush; engine->emit_flush = gen6_render_ring_flush;
engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb; engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
} else if (IS_GEN(dev_priv, 5)) { } else if (IS_GEN(dev_priv, 5)) {
engine->emit_flush = gen4_render_ring_flush; engine->emit_flush = gen4_render_ring_flush;
} else { } else {
@ -2269,13 +2374,10 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
engine->emit_flush = gen6_bsd_ring_flush; engine->emit_flush = gen6_bsd_ring_flush;
engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
if (IS_GEN(dev_priv, 6)) { if (IS_GEN(dev_priv, 6))
engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; else
} else { engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
}
} else { } else {
engine->emit_flush = bsd_ring_flush; engine->emit_flush = bsd_ring_flush;
if (IS_GEN(dev_priv, 5)) if (IS_GEN(dev_priv, 5))
@ -2298,13 +2400,10 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
engine->emit_flush = gen6_ring_flush; engine->emit_flush = gen6_ring_flush;
engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
if (IS_GEN(dev_priv, 6)) { if (IS_GEN(dev_priv, 6))
engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; else
} else { engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
}
return intel_init_ring_buffer(engine); return intel_init_ring_buffer(engine);
} }
@ -2322,8 +2421,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
engine->irq_enable = hsw_vebox_irq_enable; engine->irq_enable = hsw_vebox_irq_enable;
engine->irq_disable = hsw_vebox_irq_disable; engine->irq_disable = hsw_vebox_irq_disable;
engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
return intel_init_ring_buffer(engine); return intel_init_ring_buffer(engine);
} }

View file

@ -5,6 +5,7 @@
#include <drm/drm_util.h> #include <drm/drm_util.h>
#include <linux/hashtable.h> #include <linux/hashtable.h>
#include <linux/irq_work.h>
#include <linux/seqlock.h> #include <linux/seqlock.h>
#include "i915_gem_batch_pool.h" #include "i915_gem_batch_pool.h"
@ -32,8 +33,7 @@ struct i915_sched_attr;
struct intel_hw_status_page { struct intel_hw_status_page {
struct i915_vma *vma; struct i915_vma *vma;
u32 *page_addr; u32 *addr;
u32 ggtt_offset;
}; };
#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@ -120,13 +120,8 @@ struct intel_instdone {
struct intel_engine_hangcheck { struct intel_engine_hangcheck {
u64 acthd; u64 acthd;
u32 seqno; u32 seqno;
enum intel_engine_hangcheck_action action;
unsigned long action_timestamp; unsigned long action_timestamp;
int deadlock;
struct intel_instdone instdone; struct intel_instdone instdone;
struct i915_request *active_request;
bool stalled:1;
bool wedged:1;
}; };
struct intel_ring { struct intel_ring {
@ -209,6 +204,7 @@ struct i915_priolist {
struct st_preempt_hang { struct st_preempt_hang {
struct completion completion; struct completion completion;
unsigned int count;
bool inject_hang; bool inject_hang;
}; };
@ -299,14 +295,18 @@ struct intel_engine_execlists {
unsigned int port_mask; unsigned int port_mask;
/** /**
* @queue_priority: Highest pending priority. * @queue_priority_hint: Highest pending priority.
* *
* When we add requests into the queue, or adjust the priority of * When we add requests into the queue, or adjust the priority of
* executing requests, we compute the maximum priority of those * executing requests, we compute the maximum priority of those
* pending requests. We can then use this value to determine if * pending requests. We can then use this value to determine if
* we need to preempt the executing requests to service the queue. * we need to preempt the executing requests to service the queue.
* However, since the we may have recorded the priority of an inflight
* request we wanted to preempt but since completed, at the time of
* dequeuing the priority hint may no longer may match the highest
* available request priority.
*/ */
int queue_priority; int queue_priority_hint;
/** /**
* @queue: queue of requests, in priority lists * @queue: queue of requests, in priority lists
@ -382,22 +382,14 @@ struct intel_engine_cs {
* the overhead of waking that client is much preferred. * the overhead of waking that client is much preferred.
*/ */
struct intel_breadcrumbs { struct intel_breadcrumbs {
spinlock_t irq_lock; /* protects irq_*; irqsafe */ spinlock_t irq_lock;
struct intel_wait *irq_wait; /* oldest waiter by retirement */ struct list_head signalers;
spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ struct irq_work irq_work; /* for use from inside irq_lock */
struct rb_root waiters; /* sorted by retirement, priority */
struct list_head signals; /* sorted by retirement */
struct task_struct *signaler; /* used for fence signalling */
struct timer_list fake_irq; /* used after a missed interrupt */
struct timer_list hangcheck; /* detect missed interrupts */
unsigned int hangcheck_interrupts;
unsigned int irq_enabled; unsigned int irq_enabled;
unsigned int irq_count;
bool irq_armed : 1; bool irq_armed;
} breadcrumbs; } breadcrumbs;
struct { struct {
@ -444,9 +436,8 @@ struct intel_engine_cs {
int (*init_hw)(struct intel_engine_cs *engine); int (*init_hw)(struct intel_engine_cs *engine);
struct { struct {
struct i915_request *(*prepare)(struct intel_engine_cs *engine); void (*prepare)(struct intel_engine_cs *engine);
void (*reset)(struct intel_engine_cs *engine, void (*reset)(struct intel_engine_cs *engine, bool stalled);
struct i915_request *rq);
void (*finish)(struct intel_engine_cs *engine); void (*finish)(struct intel_engine_cs *engine);
} reset; } reset;
@ -470,8 +461,10 @@ struct intel_engine_cs {
unsigned int dispatch_flags); unsigned int dispatch_flags);
#define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_SECURE BIT(0)
#define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_PINNED BIT(1)
void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); int (*emit_init_breadcrumb)(struct i915_request *rq);
int emit_breadcrumb_sz; u32 *(*emit_fini_breadcrumb)(struct i915_request *rq,
u32 *cs);
unsigned int emit_fini_breadcrumb_dw;
/* Pass the request to the hardware queue (e.g. directly into /* Pass the request to the hardware queue (e.g. directly into
* the legacy ringbuffer or to the end of an execlist). * the legacy ringbuffer or to the end of an execlist).
@ -677,7 +670,7 @@ static inline u32
intel_read_status_page(const struct intel_engine_cs *engine, int reg) intel_read_status_page(const struct intel_engine_cs *engine, int reg)
{ {
/* Ensure that the compiler doesn't optimize away the load. */ /* Ensure that the compiler doesn't optimize away the load. */
return READ_ONCE(engine->status_page.page_addr[reg]); return READ_ONCE(engine->status_page.addr[reg]);
} }
static inline void static inline void
@ -690,12 +683,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
*/ */
if (static_cpu_has(X86_FEATURE_CLFLUSH)) { if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
mb(); mb();
clflush(&engine->status_page.page_addr[reg]); clflush(&engine->status_page.addr[reg]);
engine->status_page.page_addr[reg] = value; engine->status_page.addr[reg] = value;
clflush(&engine->status_page.page_addr[reg]); clflush(&engine->status_page.addr[reg]);
mb(); mb();
} else { } else {
WRITE_ONCE(engine->status_page.page_addr[reg], value); WRITE_ONCE(engine->status_page.addr[reg], value);
} }
} }
@ -716,11 +709,13 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
* The area from dword 0x30 to 0x3ff is available for driver usage. * The area from dword 0x30 to 0x3ff is available for driver usage.
*/ */
#define I915_GEM_HWS_INDEX 0x30 #define I915_GEM_HWS_INDEX 0x30
#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32))
#define I915_GEM_HWS_PREEMPT_INDEX 0x32 #define I915_GEM_HWS_PREEMPT 0x32
#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32))
#define I915_GEM_HWS_SCRATCH_INDEX 0x40 #define I915_GEM_HWS_SEQNO 0x40
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32))
#define I915_GEM_HWS_SCRATCH 0x80
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32))
#define I915_HWS_CSB_BUF0_INDEX 0x10 #define I915_HWS_CSB_BUF0_INDEX 0x10
#define I915_HWS_CSB_WRITE_INDEX 0x1f #define I915_HWS_CSB_WRITE_INDEX 0x1f
@ -825,7 +820,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_setup_common(struct intel_engine_cs *engine);
int intel_engine_init_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine);
void intel_engine_cleanup_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine);
@ -883,93 +878,29 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
void intel_engine_get_instdone(struct intel_engine_cs *engine, void intel_engine_get_instdone(struct intel_engine_cs *engine,
struct intel_instdone *instdone); struct intel_instdone *instdone);
static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
{ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
}
static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
{
return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
}
/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
static inline void intel_wait_init(struct intel_wait *wait)
{
wait->tsk = current;
wait->request = NULL;
}
static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
{
wait->tsk = current;
wait->seqno = seqno;
}
static inline bool intel_wait_has_seqno(const struct intel_wait *wait)
{
return wait->seqno;
}
static inline bool
intel_wait_update_seqno(struct intel_wait *wait, u32 seqno)
{
wait->seqno = seqno;
return intel_wait_has_seqno(wait);
}
static inline bool
intel_wait_update_request(struct intel_wait *wait,
const struct i915_request *rq)
{
return intel_wait_update_seqno(wait, i915_request_global_seqno(rq));
}
static inline bool
intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno)
{
return wait->seqno == seqno;
}
static inline bool
intel_wait_check_request(const struct intel_wait *wait,
const struct i915_request *rq)
{
return intel_wait_check_seqno(wait, i915_request_global_seqno(rq));
}
static inline bool intel_wait_complete(const struct intel_wait *wait)
{
return RB_EMPTY_NODE(&wait->node);
}
bool intel_engine_add_wait(struct intel_engine_cs *engine,
struct intel_wait *wait);
void intel_engine_remove_wait(struct intel_engine_cs *engine,
struct intel_wait *wait);
bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
void intel_engine_cancel_signaling(struct i915_request *request);
static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
{
return READ_ONCE(engine->breadcrumbs.irq_wait);
}
unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
#define ENGINE_WAKEUP_WAITER BIT(0)
#define ENGINE_WAKEUP_ASLEEP BIT(1)
void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
static inline void
intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
{
irq_work_queue(&engine->breadcrumbs.irq_work);
}
bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
struct drm_printer *p);
static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
{ {
memset(batch, 0, 6 * sizeof(u32)); memset(batch, 0, 6 * sizeof(u32));
@ -1018,6 +949,13 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
return cs; return cs;
} }
static inline void intel_engine_reset(struct intel_engine_cs *engine,
bool stalled)
{
if (engine->reset.reset)
engine->reset.reset(engine, stalled);
}
void intel_engines_sanitize(struct drm_i915_private *i915, bool force); void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engine_is_idle(struct intel_engine_cs *engine);

View file

@ -493,7 +493,7 @@ skl_program_plane(struct intel_plane *plane,
keymax = (key->max_value & 0xffffff) | PLANE_KEYMAX_ALPHA(alpha); keymax = (key->max_value & 0xffffff) | PLANE_KEYMAX_ALPHA(alpha);
keymsk = key->channel_mask & 0x3ffffff; keymsk = key->channel_mask & 0x7ffffff;
if (alpha < 0xff) if (alpha < 0xff)
keymsk |= PLANE_KEYMSK_ALPHA_ENABLE; keymsk |= PLANE_KEYMSK_ALPHA_ENABLE;

View file

@ -306,7 +306,7 @@ struct tv_mode {
u32 clock; u32 clock;
u16 refresh; /* in millihertz (for precision) */ u16 refresh; /* in millihertz (for precision) */
u32 oversample; u8 oversample;
u8 hsync_end; u8 hsync_end;
u16 hblank_start, hblank_end, htotal; u16 hblank_start, hblank_end, htotal;
bool progressive : 1, trilevel_sync : 1, component_only : 1; bool progressive : 1, trilevel_sync : 1, component_only : 1;
@ -339,7 +339,6 @@ struct tv_mode {
const struct video_levels *composite_levels, *svideo_levels; const struct video_levels *composite_levels, *svideo_levels;
const struct color_conversion *composite_color, *svideo_color; const struct color_conversion *composite_color, *svideo_color;
const u32 *filter_table; const u32 *filter_table;
u16 max_srcw;
}; };
@ -378,8 +377,8 @@ static const struct tv_mode tv_modes[] = {
.name = "NTSC-M", .name = "NTSC-M",
.clock = 108000, .clock = 108000,
.refresh = 59940, .refresh = 59940,
.oversample = TV_OVERSAMPLE_8X, .oversample = 8,
.component_only = 0, .component_only = false,
/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
.hsync_end = 64, .hblank_end = 124, .hsync_end = 64, .hblank_end = 124,
@ -421,8 +420,8 @@ static const struct tv_mode tv_modes[] = {
.name = "NTSC-443", .name = "NTSC-443",
.clock = 108000, .clock = 108000,
.refresh = 59940, .refresh = 59940,
.oversample = TV_OVERSAMPLE_8X, .oversample = 8,
.component_only = 0, .component_only = false,
/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 4.43MHz */ /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 4.43MHz */
.hsync_end = 64, .hblank_end = 124, .hsync_end = 64, .hblank_end = 124,
.hblank_start = 836, .htotal = 857, .hblank_start = 836, .htotal = 857,
@ -463,8 +462,8 @@ static const struct tv_mode tv_modes[] = {
.name = "NTSC-J", .name = "NTSC-J",
.clock = 108000, .clock = 108000,
.refresh = 59940, .refresh = 59940,
.oversample = TV_OVERSAMPLE_8X, .oversample = 8,
.component_only = 0, .component_only = false,
/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
.hsync_end = 64, .hblank_end = 124, .hsync_end = 64, .hblank_end = 124,
@ -506,8 +505,8 @@ static const struct tv_mode tv_modes[] = {
.name = "PAL-M", .name = "PAL-M",
.clock = 108000, .clock = 108000,
.refresh = 59940, .refresh = 59940,
.oversample = TV_OVERSAMPLE_8X, .oversample = 8,
.component_only = 0, .component_only = false,
/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
.hsync_end = 64, .hblank_end = 124, .hsync_end = 64, .hblank_end = 124,
@ -550,8 +549,8 @@ static const struct tv_mode tv_modes[] = {
.name = "PAL-N", .name = "PAL-N",
.clock = 108000, .clock = 108000,
.refresh = 50000, .refresh = 50000,
.oversample = TV_OVERSAMPLE_8X, .oversample = 8,
.component_only = 0, .component_only = false,
.hsync_end = 64, .hblank_end = 128, .hsync_end = 64, .hblank_end = 128,
.hblank_start = 844, .htotal = 863, .hblank_start = 844, .htotal = 863,
@ -595,8 +594,8 @@ static const struct tv_mode tv_modes[] = {
.name = "PAL", .name = "PAL",
.clock = 108000, .clock = 108000,
.refresh = 50000, .refresh = 50000,
.oversample = TV_OVERSAMPLE_8X, .oversample = 8,
.component_only = 0, .component_only = false,
.hsync_end = 64, .hblank_end = 142, .hsync_end = 64, .hblank_end = 142,
.hblank_start = 844, .htotal = 863, .hblank_start = 844, .htotal = 863,
@ -635,10 +634,10 @@ static const struct tv_mode tv_modes[] = {
}, },
{ {
.name = "480p", .name = "480p",
.clock = 107520, .clock = 108000,
.refresh = 59940, .refresh = 59940,
.oversample = TV_OVERSAMPLE_4X, .oversample = 4,
.component_only = 1, .component_only = true,
.hsync_end = 64, .hblank_end = 122, .hsync_end = 64, .hblank_end = 122,
.hblank_start = 842, .htotal = 857, .hblank_start = 842, .htotal = 857,
@ -659,10 +658,10 @@ static const struct tv_mode tv_modes[] = {
}, },
{ {
.name = "576p", .name = "576p",
.clock = 107520, .clock = 108000,
.refresh = 50000, .refresh = 50000,
.oversample = TV_OVERSAMPLE_4X, .oversample = 4,
.component_only = 1, .component_only = true,
.hsync_end = 64, .hblank_end = 139, .hsync_end = 64, .hblank_end = 139,
.hblank_start = 859, .htotal = 863, .hblank_start = 859, .htotal = 863,
@ -683,10 +682,10 @@ static const struct tv_mode tv_modes[] = {
}, },
{ {
.name = "720p@60Hz", .name = "720p@60Hz",
.clock = 148800, .clock = 148500,
.refresh = 60000, .refresh = 60000,
.oversample = TV_OVERSAMPLE_2X, .oversample = 2,
.component_only = 1, .component_only = true,
.hsync_end = 80, .hblank_end = 300, .hsync_end = 80, .hblank_end = 300,
.hblank_start = 1580, .htotal = 1649, .hblank_start = 1580, .htotal = 1649,
@ -707,10 +706,10 @@ static const struct tv_mode tv_modes[] = {
}, },
{ {
.name = "720p@50Hz", .name = "720p@50Hz",
.clock = 148800, .clock = 148500,
.refresh = 50000, .refresh = 50000,
.oversample = TV_OVERSAMPLE_2X, .oversample = 2,
.component_only = 1, .component_only = true,
.hsync_end = 80, .hblank_end = 300, .hsync_end = 80, .hblank_end = 300,
.hblank_start = 1580, .htotal = 1979, .hblank_start = 1580, .htotal = 1979,
@ -728,14 +727,13 @@ static const struct tv_mode tv_modes[] = {
.burst_ena = false, .burst_ena = false,
.filter_table = filter_table, .filter_table = filter_table,
.max_srcw = 800
}, },
{ {
.name = "1080i@50Hz", .name = "1080i@50Hz",
.clock = 148800, .clock = 148500,
.refresh = 50000, .refresh = 50000,
.oversample = TV_OVERSAMPLE_2X, .oversample = 2,
.component_only = 1, .component_only = true,
.hsync_end = 88, .hblank_end = 235, .hsync_end = 88, .hblank_end = 235,
.hblank_start = 2155, .htotal = 2639, .hblank_start = 2155, .htotal = 2639,
@ -758,10 +756,10 @@ static const struct tv_mode tv_modes[] = {
}, },
{ {
.name = "1080i@60Hz", .name = "1080i@60Hz",
.clock = 148800, .clock = 148500,
.refresh = 60000, .refresh = 60000,
.oversample = TV_OVERSAMPLE_2X, .oversample = 2,
.component_only = 1, .component_only = true,
.hsync_end = 88, .hblank_end = 235, .hsync_end = 88, .hblank_end = 235,
.hblank_start = 2155, .htotal = 2199, .hblank_start = 2155, .htotal = 2199,
@ -782,8 +780,115 @@ static const struct tv_mode tv_modes[] = {
.filter_table = filter_table, .filter_table = filter_table,
}, },
{
.name = "1080p@30Hz",
.clock = 148500,
.refresh = 30000,
.oversample = 2,
.component_only = true,
.hsync_end = 88, .hblank_end = 235,
.hblank_start = 2155, .htotal = 2199,
.progressive = true, .trilevel_sync = true,
.vsync_start_f1 = 8, .vsync_start_f2 = 8,
.vsync_len = 10,
.veq_ena = false, .veq_start_f1 = 0,
.veq_start_f2 = 0, .veq_len = 0,
.vi_end_f1 = 44, .vi_end_f2 = 44,
.nbr_end = 1079,
.burst_ena = false,
.filter_table = filter_table,
},
{
.name = "1080p@50Hz",
.clock = 148500,
.refresh = 50000,
.oversample = 1,
.component_only = true,
.hsync_end = 88, .hblank_end = 235,
.hblank_start = 2155, .htotal = 2639,
.progressive = true, .trilevel_sync = true,
.vsync_start_f1 = 8, .vsync_start_f2 = 8,
.vsync_len = 10,
.veq_ena = false, .veq_start_f1 = 0,
.veq_start_f2 = 0, .veq_len = 0,
.vi_end_f1 = 44, .vi_end_f2 = 44,
.nbr_end = 1079,
.burst_ena = false,
.filter_table = filter_table,
},
{
.name = "1080p@60Hz",
.clock = 148500,
.refresh = 60000,
.oversample = 1,
.component_only = true,
.hsync_end = 88, .hblank_end = 235,
.hblank_start = 2155, .htotal = 2199,
.progressive = true, .trilevel_sync = true,
.vsync_start_f1 = 8, .vsync_start_f2 = 8,
.vsync_len = 10,
.veq_ena = false, .veq_start_f1 = 0,
.veq_start_f2 = 0, .veq_len = 0,
.vi_end_f1 = 44, .vi_end_f2 = 44,
.nbr_end = 1079,
.burst_ena = false,
.filter_table = filter_table,
},
}; };
struct intel_tv_connector_state {
struct drm_connector_state base;
/*
* May need to override the user margins for
* gen3 >1024 wide source vertical centering.
*/
struct {
u16 top, bottom;
} margins;
bool bypass_vfilter;
};
#define to_intel_tv_connector_state(x) container_of(x, struct intel_tv_connector_state, base)
static struct drm_connector_state *
intel_tv_connector_duplicate_state(struct drm_connector *connector)
{
struct intel_tv_connector_state *state;
state = kmemdup(connector->state, sizeof(*state), GFP_KERNEL);
if (!state)
return NULL;
__drm_atomic_helper_connector_duplicate_state(connector, &state->base);
return &state->base;
}
static struct intel_tv *enc_to_tv(struct intel_encoder *encoder) static struct intel_tv *enc_to_tv(struct intel_encoder *encoder)
{ {
return container_of(encoder, struct intel_tv, base); return container_of(encoder, struct intel_tv, base);
@ -859,14 +964,215 @@ intel_tv_mode_valid(struct drm_connector *connector,
return MODE_CLOCK_RANGE; return MODE_CLOCK_RANGE;
} }
static int
intel_tv_mode_vdisplay(const struct tv_mode *tv_mode)
{
if (tv_mode->progressive)
return tv_mode->nbr_end + 1;
else
return 2 * (tv_mode->nbr_end + 1);
}
static void
intel_tv_mode_to_mode(struct drm_display_mode *mode,
const struct tv_mode *tv_mode)
{
mode->clock = tv_mode->clock /
(tv_mode->oversample >> !tv_mode->progressive);
/*
* tv_mode horizontal timings:
*
* hsync_end
* | hblank_end
* | | hblank_start
* | | | htotal
* | _______ |
* ____/ \___
* \__/ \
*/
mode->hdisplay =
tv_mode->hblank_start - tv_mode->hblank_end;
mode->hsync_start = mode->hdisplay +
tv_mode->htotal - tv_mode->hblank_start;
mode->hsync_end = mode->hsync_start +
tv_mode->hsync_end;
mode->htotal = tv_mode->htotal + 1;
/*
* tv_mode vertical timings:
*
* vsync_start
* | vsync_end
* | | vi_end nbr_end
* | | | |
* | | _______
* \__ ____/ \
* \__/
*/
mode->vdisplay = intel_tv_mode_vdisplay(tv_mode);
if (tv_mode->progressive) {
mode->vsync_start = mode->vdisplay +
tv_mode->vsync_start_f1 + 1;
mode->vsync_end = mode->vsync_start +
tv_mode->vsync_len;
mode->vtotal = mode->vdisplay +
tv_mode->vi_end_f1 + 1;
} else {
mode->vsync_start = mode->vdisplay +
tv_mode->vsync_start_f1 + 1 +
tv_mode->vsync_start_f2 + 1;
mode->vsync_end = mode->vsync_start +
2 * tv_mode->vsync_len;
mode->vtotal = mode->vdisplay +
tv_mode->vi_end_f1 + 1 +
tv_mode->vi_end_f2 + 1;
}
/* TV has it's own notion of sync and other mode flags, so clear them. */
mode->flags = 0;
mode->vrefresh = 0;
mode->vrefresh = drm_mode_vrefresh(mode);
snprintf(mode->name, sizeof(mode->name),
"%dx%d%c (%s)",
mode->hdisplay, mode->vdisplay,
tv_mode->progressive ? 'p' : 'i',
tv_mode->name);
}
static void intel_tv_scale_mode_horiz(struct drm_display_mode *mode,
int hdisplay, int left_margin,
int right_margin)
{
int hsync_start = mode->hsync_start - mode->hdisplay + right_margin;
int hsync_end = mode->hsync_end - mode->hdisplay + right_margin;
int new_htotal = mode->htotal * hdisplay /
(mode->hdisplay - left_margin - right_margin);
mode->clock = mode->clock * new_htotal / mode->htotal;
mode->hdisplay = hdisplay;
mode->hsync_start = hdisplay + hsync_start * new_htotal / mode->htotal;
mode->hsync_end = hdisplay + hsync_end * new_htotal / mode->htotal;
mode->htotal = new_htotal;
}
static void intel_tv_scale_mode_vert(struct drm_display_mode *mode,
int vdisplay, int top_margin,
int bottom_margin)
{
int vsync_start = mode->vsync_start - mode->vdisplay + bottom_margin;
int vsync_end = mode->vsync_end - mode->vdisplay + bottom_margin;
int new_vtotal = mode->vtotal * vdisplay /
(mode->vdisplay - top_margin - bottom_margin);
mode->clock = mode->clock * new_vtotal / mode->vtotal;
mode->vdisplay = vdisplay;
mode->vsync_start = vdisplay + vsync_start * new_vtotal / mode->vtotal;
mode->vsync_end = vdisplay + vsync_end * new_vtotal / mode->vtotal;
mode->vtotal = new_vtotal;
}
static void static void
intel_tv_get_config(struct intel_encoder *encoder, intel_tv_get_config(struct intel_encoder *encoder,
struct intel_crtc_state *pipe_config) struct intel_crtc_state *pipe_config)
{ {
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct drm_display_mode *adjusted_mode =
&pipe_config->base.adjusted_mode;
struct drm_display_mode mode = {};
u32 tv_ctl, hctl1, hctl3, vctl1, vctl2, tmp;
struct tv_mode tv_mode = {};
int hdisplay = adjusted_mode->crtc_hdisplay;
int vdisplay = adjusted_mode->crtc_vdisplay;
int xsize, ysize, xpos, ypos;
pipe_config->output_types |= BIT(INTEL_OUTPUT_TVOUT); pipe_config->output_types |= BIT(INTEL_OUTPUT_TVOUT);
pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; tv_ctl = I915_READ(TV_CTL);
hctl1 = I915_READ(TV_H_CTL_1);
hctl3 = I915_READ(TV_H_CTL_3);
vctl1 = I915_READ(TV_V_CTL_1);
vctl2 = I915_READ(TV_V_CTL_2);
tv_mode.htotal = (hctl1 & TV_HTOTAL_MASK) >> TV_HTOTAL_SHIFT;
tv_mode.hsync_end = (hctl1 & TV_HSYNC_END_MASK) >> TV_HSYNC_END_SHIFT;
tv_mode.hblank_start = (hctl3 & TV_HBLANK_START_MASK) >> TV_HBLANK_START_SHIFT;
tv_mode.hblank_end = (hctl3 & TV_HSYNC_END_MASK) >> TV_HBLANK_END_SHIFT;
tv_mode.nbr_end = (vctl1 & TV_NBR_END_MASK) >> TV_NBR_END_SHIFT;
tv_mode.vi_end_f1 = (vctl1 & TV_VI_END_F1_MASK) >> TV_VI_END_F1_SHIFT;
tv_mode.vi_end_f2 = (vctl1 & TV_VI_END_F2_MASK) >> TV_VI_END_F2_SHIFT;
tv_mode.vsync_len = (vctl2 & TV_VSYNC_LEN_MASK) >> TV_VSYNC_LEN_SHIFT;
tv_mode.vsync_start_f1 = (vctl2 & TV_VSYNC_START_F1_MASK) >> TV_VSYNC_START_F1_SHIFT;
tv_mode.vsync_start_f2 = (vctl2 & TV_VSYNC_START_F2_MASK) >> TV_VSYNC_START_F2_SHIFT;
tv_mode.clock = pipe_config->port_clock;
tv_mode.progressive = tv_ctl & TV_PROGRESSIVE;
switch (tv_ctl & TV_OVERSAMPLE_MASK) {
case TV_OVERSAMPLE_8X:
tv_mode.oversample = 8;
break;
case TV_OVERSAMPLE_4X:
tv_mode.oversample = 4;
break;
case TV_OVERSAMPLE_2X:
tv_mode.oversample = 2;
break;
default:
tv_mode.oversample = 1;
break;
}
tmp = I915_READ(TV_WIN_POS);
xpos = tmp >> 16;
ypos = tmp & 0xffff;
tmp = I915_READ(TV_WIN_SIZE);
xsize = tmp >> 16;
ysize = tmp & 0xffff;
intel_tv_mode_to_mode(&mode, &tv_mode);
DRM_DEBUG_KMS("TV mode:\n");
drm_mode_debug_printmodeline(&mode);
intel_tv_scale_mode_horiz(&mode, hdisplay,
xpos, mode.hdisplay - xsize - xpos);
intel_tv_scale_mode_vert(&mode, vdisplay,
ypos, mode.vdisplay - ysize - ypos);
adjusted_mode->crtc_clock = mode.clock;
if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
adjusted_mode->crtc_clock /= 2;
/* pixel counter doesn't work on i965gm TV output */
if (IS_I965GM(dev_priv))
adjusted_mode->private_flags |=
I915_MODE_FLAG_USE_SCANLINE_COUNTER;
}
static bool intel_tv_source_too_wide(struct drm_i915_private *dev_priv,
int hdisplay)
{
return IS_GEN(dev_priv, 3) && hdisplay > 1024;
}
static bool intel_tv_vert_scaling(const struct drm_display_mode *tv_mode,
const struct drm_connector_state *conn_state,
int vdisplay)
{
return tv_mode->crtc_vdisplay -
conn_state->tv.margins.top -
conn_state->tv.margins.bottom !=
vdisplay;
} }
static int static int
@ -874,9 +1180,14 @@ intel_tv_compute_config(struct intel_encoder *encoder,
struct intel_crtc_state *pipe_config, struct intel_crtc_state *pipe_config,
struct drm_connector_state *conn_state) struct drm_connector_state *conn_state)
{ {
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct intel_tv_connector_state *tv_conn_state =
to_intel_tv_connector_state(conn_state);
const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
struct drm_display_mode *adjusted_mode = struct drm_display_mode *adjusted_mode =
&pipe_config->base.adjusted_mode; &pipe_config->base.adjusted_mode;
int hdisplay = adjusted_mode->crtc_hdisplay;
int vdisplay = adjusted_mode->crtc_vdisplay;
if (!tv_mode) if (!tv_mode)
return -EINVAL; return -EINVAL;
@ -885,17 +1196,136 @@ intel_tv_compute_config(struct intel_encoder *encoder,
return -EINVAL; return -EINVAL;
pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
adjusted_mode->crtc_clock = tv_mode->clock;
DRM_DEBUG_KMS("forcing bpc to 8 for TV\n"); DRM_DEBUG_KMS("forcing bpc to 8 for TV\n");
pipe_config->pipe_bpp = 8*3; pipe_config->pipe_bpp = 8*3;
/* TV has it's own notion of sync and other mode flags, so clear them. */ pipe_config->port_clock = tv_mode->clock;
adjusted_mode->flags = 0;
intel_tv_mode_to_mode(adjusted_mode, tv_mode);
drm_mode_set_crtcinfo(adjusted_mode, 0);
if (intel_tv_source_too_wide(dev_priv, hdisplay) ||
!intel_tv_vert_scaling(adjusted_mode, conn_state, vdisplay)) {
int extra, top, bottom;
extra = adjusted_mode->crtc_vdisplay - vdisplay;
if (extra < 0) {
DRM_DEBUG_KMS("No vertical scaling for >1024 pixel wide modes\n");
return -EINVAL;
}
/* Need to turn off the vertical filter and center the image */
/* Attempt to maintain the relative sizes of the margins */
top = conn_state->tv.margins.top;
bottom = conn_state->tv.margins.bottom;
if (top + bottom)
top = extra * top / (top + bottom);
else
top = extra / 2;
bottom = extra - top;
tv_conn_state->margins.top = top;
tv_conn_state->margins.bottom = bottom;
tv_conn_state->bypass_vfilter = true;
if (!tv_mode->progressive) {
adjusted_mode->clock /= 2;
adjusted_mode->crtc_clock /= 2;
adjusted_mode->flags |= DRM_MODE_FLAG_INTERLACE;
}
} else {
tv_conn_state->margins.top = conn_state->tv.margins.top;
tv_conn_state->margins.bottom = conn_state->tv.margins.bottom;
tv_conn_state->bypass_vfilter = false;
}
DRM_DEBUG_KMS("TV mode:\n");
drm_mode_debug_printmodeline(adjusted_mode);
/* /*
* FIXME: We don't check whether the input mode is actually what we want * The pipe scanline counter behaviour looks as follows when
* or whether userspace is doing something stupid. * using the TV encoder:
*
* time ->
*
* dsl=vtotal-1 | |
* || ||
* ___| | ___| |
* / | / |
* / | / |
* dsl=0 ___/ |_____/ |
* | | | | | |
* ^ ^ ^ ^ ^
* | | | | pipe vblank/first part of tv vblank
* | | | bottom margin
* | | active
* | top margin
* remainder of tv vblank
*
* When the TV encoder is used the pipe wants to run faster
* than expected rate. During the active portion the TV
* encoder stalls the pipe every few lines to keep it in
* check. When the TV encoder reaches the bottom margin the
* pipe simply stops. Once we reach the TV vblank the pipe is
* no longer stalled and it runs at the max rate (apparently
* oversample clock on gen3, cdclk on gen4). Once the pipe
* reaches the pipe vtotal the pipe stops for the remainder
* of the TV vblank/top margin. The pipe starts up again when
* the TV encoder exits the top margin.
*
* To avoid huge hassles for vblank timestamping we scale
* the pipe timings as if the pipe always runs at the average
* rate it maintains during the active period. This also
* gives us a reasonable guesstimate as to the pixel rate.
* Due to the variation in the actual pipe speed the scanline
* counter will give us slightly erroneous results during the
* TV vblank/margins. But since vtotal was selected such that
* it matches the average rate of the pipe during the active
* portion the error shouldn't cause any serious grief to
* vblank timestamps.
*
* For posterity here is the empirically derived formula
* that gives us the maximum length of the pipe vblank
* we can use without causing display corruption. Following
* this would allow us to have a ticking scanline counter
* everywhere except during the bottom margin (there the
* pipe always stops). Ie. this would eliminate the second
* flat portion of the above graph. However this would also
* complicate vblank timestamping as the pipe vtotal would
* no longer match the average rate the pipe runs at during
* the active portion. Hence following this formula seems
* more trouble that it's worth.
*
* if (IS_GEN(dev_priv, 4)) {
* num = cdclk * (tv_mode->oversample >> !tv_mode->progressive);
* den = tv_mode->clock;
* } else {
* num = tv_mode->oversample >> !tv_mode->progressive;
* den = 1;
* }
* max_pipe_vblank_len ~=
* (num * tv_htotal * (tv_vblank_len + top_margin)) /
* (den * pipe_htotal);
*/ */
intel_tv_scale_mode_horiz(adjusted_mode, hdisplay,
conn_state->tv.margins.left,
conn_state->tv.margins.right);
intel_tv_scale_mode_vert(adjusted_mode, vdisplay,
tv_conn_state->margins.top,
tv_conn_state->margins.bottom);
drm_mode_set_crtcinfo(adjusted_mode, 0);
adjusted_mode->name[0] = '\0';
/* pixel counter doesn't work on i965gm TV output */
if (IS_I965GM(dev_priv))
adjusted_mode->private_flags |=
I915_MODE_FLAG_USE_SCANLINE_COUNTER;
return 0; return 0;
} }
@ -986,14 +1416,16 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc);
struct intel_tv *intel_tv = enc_to_tv(encoder); struct intel_tv *intel_tv = enc_to_tv(encoder);
const struct intel_tv_connector_state *tv_conn_state =
to_intel_tv_connector_state(conn_state);
const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
u32 tv_ctl; u32 tv_ctl, tv_filter_ctl;
u32 scctl1, scctl2, scctl3; u32 scctl1, scctl2, scctl3;
int i, j; int i, j;
const struct video_levels *video_levels; const struct video_levels *video_levels;
const struct color_conversion *color_conversion; const struct color_conversion *color_conversion;
bool burst_ena; bool burst_ena;
int xpos = 0x0, ypos = 0x0; int xpos, ypos;
unsigned int xsize, ysize; unsigned int xsize, ysize;
if (!tv_mode) if (!tv_mode)
@ -1029,7 +1461,21 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
} }
tv_ctl |= TV_ENC_PIPE_SEL(intel_crtc->pipe); tv_ctl |= TV_ENC_PIPE_SEL(intel_crtc->pipe);
tv_ctl |= tv_mode->oversample;
switch (tv_mode->oversample) {
case 8:
tv_ctl |= TV_OVERSAMPLE_8X;
break;
case 4:
tv_ctl |= TV_OVERSAMPLE_4X;
break;
case 2:
tv_ctl |= TV_OVERSAMPLE_2X;
break;
default:
tv_ctl |= TV_OVERSAMPLE_NONE;
break;
}
if (tv_mode->progressive) if (tv_mode->progressive)
tv_ctl |= TV_PROGRESSIVE; tv_ctl |= TV_PROGRESSIVE;
@ -1081,19 +1527,20 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
assert_pipe_disabled(dev_priv, intel_crtc->pipe); assert_pipe_disabled(dev_priv, intel_crtc->pipe);
/* Filter ctl must be set before TV_WIN_SIZE */ /* Filter ctl must be set before TV_WIN_SIZE */
I915_WRITE(TV_FILTER_CTL_1, TV_AUTO_SCALE); tv_filter_ctl = TV_AUTO_SCALE;
xsize = tv_mode->hblank_start - tv_mode->hblank_end; if (tv_conn_state->bypass_vfilter)
if (tv_mode->progressive) tv_filter_ctl |= TV_V_FILTER_BYPASS;
ysize = tv_mode->nbr_end + 1; I915_WRITE(TV_FILTER_CTL_1, tv_filter_ctl);
else
ysize = 2*tv_mode->nbr_end + 1;
xpos += conn_state->tv.margins.left; xsize = tv_mode->hblank_start - tv_mode->hblank_end;
ypos += conn_state->tv.margins.top; ysize = intel_tv_mode_vdisplay(tv_mode);
xpos = conn_state->tv.margins.left;
ypos = tv_conn_state->margins.top;
xsize -= (conn_state->tv.margins.left + xsize -= (conn_state->tv.margins.left +
conn_state->tv.margins.right); conn_state->tv.margins.right);
ysize -= (conn_state->tv.margins.top + ysize -= (tv_conn_state->margins.top +
conn_state->tv.margins.bottom); tv_conn_state->margins.bottom);
I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos); I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos);
I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize); I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize);
@ -1110,23 +1557,6 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
I915_WRITE(TV_CTL, tv_ctl); I915_WRITE(TV_CTL, tv_ctl);
} }
static const struct drm_display_mode reported_modes[] = {
{
.name = "NTSC 480i",
.clock = 107520,
.hdisplay = 1280,
.hsync_start = 1368,
.hsync_end = 1496,
.htotal = 1712,
.vdisplay = 1024,
.vsync_start = 1027,
.vsync_end = 1034,
.vtotal = 1104,
.type = DRM_MODE_TYPE_DRIVER,
},
};
static int static int
intel_tv_detect_type(struct intel_tv *intel_tv, intel_tv_detect_type(struct intel_tv *intel_tv,
struct drm_connector *connector) struct drm_connector *connector)
@ -1233,16 +1663,18 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
int i; int i;
if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == /* Component supports everything so we can keep the current mode */
tv_mode->component_only) if (intel_tv->type == DRM_MODE_CONNECTOR_Component)
return; return;
/* If the current mode is fine don't change it */
if (!tv_mode->component_only)
return;
for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
tv_mode = tv_modes + i; tv_mode = &tv_modes[i];
if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == if (!tv_mode->component_only)
tv_mode->component_only)
break; break;
} }
@ -1254,7 +1686,6 @@ intel_tv_detect(struct drm_connector *connector,
struct drm_modeset_acquire_ctx *ctx, struct drm_modeset_acquire_ctx *ctx,
bool force) bool force)
{ {
struct drm_display_mode mode;
struct intel_tv *intel_tv = intel_attached_tv(connector); struct intel_tv *intel_tv = intel_attached_tv(connector);
enum drm_connector_status status; enum drm_connector_status status;
int type; int type;
@ -1263,13 +1694,11 @@ intel_tv_detect(struct drm_connector *connector,
connector->base.id, connector->name, connector->base.id, connector->name,
force); force);
mode = reported_modes[0];
if (force) { if (force) {
struct intel_load_detect_pipe tmp; struct intel_load_detect_pipe tmp;
int ret; int ret;
ret = intel_get_load_detect_pipe(connector, &mode, &tmp, ctx); ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx);
if (ret < 0) if (ret < 0)
return ret; return ret;
@ -1293,84 +1722,85 @@ intel_tv_detect(struct drm_connector *connector,
} }
static const struct input_res { static const struct input_res {
const char *name; u16 w, h;
int w, h;
} input_res_table[] = { } input_res_table[] = {
{"640x480", 640, 480}, { 640, 480 },
{"800x600", 800, 600}, { 800, 600 },
{"1024x768", 1024, 768}, { 1024, 768 },
{"1280x1024", 1280, 1024}, { 1280, 1024 },
{"848x480", 848, 480}, { 848, 480 },
{"1280x720", 1280, 720}, { 1280, 720 },
{"1920x1080", 1920, 1080}, { 1920, 1080 },
}; };
/* /* Choose preferred mode according to line number of TV format */
* Chose preferred mode according to line number of TV format static bool
*/ intel_tv_is_preferred_mode(const struct drm_display_mode *mode,
static void const struct tv_mode *tv_mode)
intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode,
struct drm_display_mode *mode_ptr)
{ {
if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480) int vdisplay = intel_tv_mode_vdisplay(tv_mode);
mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
else if (tv_mode->nbr_end > 480) { /* prefer 480 line modes for all SD TV modes */
if (tv_mode->progressive == true && tv_mode->nbr_end < 720) { if (vdisplay <= 576)
if (mode_ptr->vdisplay == 720) vdisplay = 480;
mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
} else if (mode_ptr->vdisplay == 1080) return vdisplay == mode->vdisplay;
mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
} }
static void
intel_tv_set_mode_type(struct drm_display_mode *mode,
const struct tv_mode *tv_mode)
{
mode->type = DRM_MODE_TYPE_DRIVER;
if (intel_tv_is_preferred_mode(mode, tv_mode))
mode->type |= DRM_MODE_TYPE_PREFERRED;
} }
static int static int
intel_tv_get_modes(struct drm_connector *connector) intel_tv_get_modes(struct drm_connector *connector)
{ {
struct drm_display_mode *mode_ptr; struct drm_i915_private *dev_priv = to_i915(connector->dev);
const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
int j, count = 0; int i, count = 0;
u64 tmp;
for (j = 0; j < ARRAY_SIZE(input_res_table); for (i = 0; i < ARRAY_SIZE(input_res_table); i++) {
j++) { const struct input_res *input = &input_res_table[i];
const struct input_res *input = &input_res_table[j]; struct drm_display_mode *mode;
unsigned int hactive_s = input->w;
unsigned int vactive_s = input->h;
if (tv_mode->max_srcw && input->w > tv_mode->max_srcw) if (input->w > 1024 &&
!tv_mode->progressive &&
!tv_mode->component_only)
continue; continue;
if (input->w > 1024 && (!tv_mode->progressive /* no vertical scaling with wide sources on gen3 */
&& !tv_mode->component_only)) if (IS_GEN(dev_priv, 3) && input->w > 1024 &&
input->h > intel_tv_mode_vdisplay(tv_mode))
continue; continue;
mode_ptr = drm_mode_create(connector->dev); mode = drm_mode_create(connector->dev);
if (!mode_ptr) if (!mode)
continue; continue;
strlcpy(mode_ptr->name, input->name, DRM_DISPLAY_MODE_LEN);
mode_ptr->hdisplay = hactive_s; /*
mode_ptr->hsync_start = hactive_s + 1; * We take the TV mode and scale it to look
mode_ptr->hsync_end = hactive_s + 64; * like it had the expected h/vdisplay. This
if (mode_ptr->hsync_end <= mode_ptr->hsync_start) * provides the most information to userspace
mode_ptr->hsync_end = mode_ptr->hsync_start + 1; * about the actual timings of the mode. We
mode_ptr->htotal = hactive_s + 96; * do ignore the margins though.
*/
intel_tv_mode_to_mode(mode, tv_mode);
if (count == 0) {
DRM_DEBUG_KMS("TV mode:\n");
drm_mode_debug_printmodeline(mode);
}
intel_tv_scale_mode_horiz(mode, input->w, 0, 0);
intel_tv_scale_mode_vert(mode, input->h, 0, 0);
intel_tv_set_mode_type(mode, tv_mode);
mode_ptr->vdisplay = vactive_s; drm_mode_set_name(mode);
mode_ptr->vsync_start = vactive_s + 1;
mode_ptr->vsync_end = vactive_s + 32;
if (mode_ptr->vsync_end <= mode_ptr->vsync_start)
mode_ptr->vsync_end = mode_ptr->vsync_start + 1;
mode_ptr->vtotal = vactive_s + 33;
tmp = mul_u32_u32(tv_mode->refresh, mode_ptr->vtotal); drm_mode_probed_add(connector, mode);
tmp *= mode_ptr->htotal;
tmp = div_u64(tmp, 1000000);
mode_ptr->clock = (int) tmp;
mode_ptr->type = DRM_MODE_TYPE_DRIVER;
intel_tv_choose_preferred_modes(tv_mode, mode_ptr);
drm_mode_probed_add(connector, mode_ptr);
count++; count++;
} }
@ -1383,7 +1813,7 @@ static const struct drm_connector_funcs intel_tv_connector_funcs = {
.destroy = intel_connector_destroy, .destroy = intel_connector_destroy,
.fill_modes = drm_helper_probe_single_connector_modes, .fill_modes = drm_helper_probe_single_connector_modes,
.atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_duplicate_state = intel_tv_connector_duplicate_state,
}; };
static int intel_tv_atomic_check(struct drm_connector *connector, static int intel_tv_atomic_check(struct drm_connector *connector,
@ -1530,11 +1960,15 @@ intel_tv_init(struct drm_i915_private *dev_priv)
connector->doublescan_allowed = false; connector->doublescan_allowed = false;
/* Create TV properties then attach current values */ /* Create TV properties then attach current values */
for (i = 0; i < ARRAY_SIZE(tv_modes); i++) for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
/* 1080p50/1080p60 not supported on gen3 */
if (IS_GEN(dev_priv, 3) &&
tv_modes[i].oversample == 1)
break;
tv_format_names[i] = tv_modes[i].name; tv_format_names[i] = tv_modes[i].name;
drm_mode_create_tv_properties(dev, }
ARRAY_SIZE(tv_modes), drm_mode_create_tv_properties(dev, i, tv_format_names);
tv_format_names);
drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property, drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property,
state->tv.mode); state->tv.mode);

View file

@ -142,7 +142,8 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
} }
static void static void
__wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
u32 val)
{ {
struct i915_wa wa = { struct i915_wa wa = {
.reg = reg, .reg = reg,
@ -153,16 +154,32 @@ __wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
_wa_add(wal, &wa); _wa_add(wal, &wa);
} }
#define WA_REG(addr, mask, val) __wa_add(wal, (addr), (mask), (val)) static void
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
}
static void
wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_write_masked_or(wal, reg, ~0, val);
}
static void
wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_write_masked_or(wal, reg, val, val);
}
#define WA_SET_BIT_MASKED(addr, mask) \ #define WA_SET_BIT_MASKED(addr, mask) \
WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
#define WA_CLR_BIT_MASKED(addr, mask) \ #define WA_CLR_BIT_MASKED(addr, mask) \
WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
#define WA_SET_FIELD_MASKED(addr, mask, value) \ #define WA_SET_FIELD_MASKED(addr, mask, value) \
WA_REG(addr, (mask), _MASKED_FIELD(mask, value)) wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine) static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine)
{ {
@ -532,6 +549,12 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
/* WaEnableFloatBlendOptimization:icl */
wa_write_masked_or(wal,
GEN10_CACHE_MODE_SS,
0, /* write-only, so skip validation */
_MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
} }
void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
@ -602,43 +625,6 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
return 0; return 0;
} }
static void
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
struct i915_wa wa = {
.reg = reg,
.mask = val,
.val = _MASKED_BIT_ENABLE(val)
};
_wa_add(wal, &wa);
}
static void
wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
u32 val)
{
struct i915_wa wa = {
.reg = reg,
.mask = mask,
.val = val
};
_wa_add(wal, &wa);
}
static void
wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_write_masked_or(wal, reg, ~0, val);
}
static void
wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_write_masked_or(wal, reg, val, val);
}
static void static void
gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{ {

View file

@ -84,7 +84,7 @@ static int populate_ggtt(struct drm_i915_private *i915,
return -EINVAL; return -EINVAL;
} }
if (list_empty(&i915->ggtt.vm.inactive_list)) { if (list_empty(&i915->ggtt.vm.bound_list)) {
pr_err("No objects on the GGTT inactive list!\n"); pr_err("No objects on the GGTT inactive list!\n");
return -EINVAL; return -EINVAL;
} }
@ -94,11 +94,14 @@ static int populate_ggtt(struct drm_i915_private *i915,
static void unpin_ggtt(struct drm_i915_private *i915) static void unpin_ggtt(struct drm_i915_private *i915)
{ {
struct i915_ggtt *ggtt = &i915->ggtt;
struct i915_vma *vma; struct i915_vma *vma;
list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link) mutex_lock(&ggtt->vm.mutex);
list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
if (vma->obj->mm.quirked) if (vma->obj->mm.quirked)
i915_vma_unpin(vma); i915_vma_unpin(vma);
mutex_unlock(&ggtt->vm.mutex);
} }
static void cleanup_objects(struct drm_i915_private *i915, static void cleanup_objects(struct drm_i915_private *i915,

View file

@ -1237,7 +1237,10 @@ static void track_vma_bind(struct i915_vma *vma)
__i915_gem_object_pin_pages(obj); __i915_gem_object_pin_pages(obj);
vma->pages = obj->mm.pages; vma->pages = obj->mm.pages;
list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
mutex_lock(&vma->vm->mutex);
list_move_tail(&vma->vm_link, &vma->vm->bound_list);
mutex_unlock(&vma->vm->mutex);
} }
static int exercise_mock(struct drm_i915_private *i915, static int exercise_mock(struct drm_i915_private *i915,

View file

@ -13,6 +13,7 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
selftest(uncore, intel_uncore_live_selftests) selftest(uncore, intel_uncore_live_selftests)
selftest(workarounds, intel_workarounds_live_selftests) selftest(workarounds, intel_workarounds_live_selftests)
selftest(requests, i915_request_live_selftests) selftest(requests, i915_request_live_selftests)
selftest(timelines, i915_timeline_live_selftests)
selftest(objects, i915_gem_object_live_selftests) selftest(objects, i915_gem_object_live_selftests)
selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests)
selftest(coherency, i915_gem_coherency_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests)

View file

@ -15,8 +15,7 @@ selftest(scatterlist, scatterlist_mock_selftests)
selftest(syncmap, i915_syncmap_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests)
selftest(uncore, intel_uncore_mock_selftests) selftest(uncore, intel_uncore_mock_selftests)
selftest(engine, intel_engine_cs_mock_selftests) selftest(engine, intel_engine_cs_mock_selftests)
selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(timelines, i915_timeline_mock_selftests)
selftest(timelines, i915_gem_timeline_mock_selftests)
selftest(requests, i915_request_mock_selftests) selftest(requests, i915_request_mock_selftests)
selftest(objects, i915_gem_object_mock_selftests) selftest(objects, i915_gem_object_mock_selftests)
selftest(dmabuf, i915_gem_dmabuf_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests)

View file

@ -41,16 +41,35 @@ u64 i915_prandom_u64_state(struct rnd_state *rnd)
return x; return x;
} }
void i915_prandom_shuffle(void *arr, size_t elsz, size_t count,
struct rnd_state *state)
{
char stack[128];
if (WARN_ON(elsz > sizeof(stack) || count > U32_MAX))
return;
if (!elsz || !count)
return;
/* Fisher-Yates shuffle courtesy of Knuth */
while (--count) {
size_t swp;
swp = i915_prandom_u32_max_state(count + 1, state);
if (swp == count)
continue;
memcpy(stack, arr + count * elsz, elsz);
memcpy(arr + count * elsz, arr + swp * elsz, elsz);
memcpy(arr + swp * elsz, stack, elsz);
}
}
void i915_random_reorder(unsigned int *order, unsigned int count, void i915_random_reorder(unsigned int *order, unsigned int count,
struct rnd_state *state) struct rnd_state *state)
{ {
unsigned int i, j; i915_prandom_shuffle(order, sizeof(*order), count, state);
for (i = 0; i < count; i++) {
BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32));
j = i915_prandom_u32_max_state(count, state);
swap(order[i], order[j]);
}
} }
unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) unsigned int *i915_random_order(unsigned int count, struct rnd_state *state)

View file

@ -54,4 +54,7 @@ void i915_random_reorder(unsigned int *order,
unsigned int count, unsigned int count,
struct rnd_state *state); struct rnd_state *state);
void i915_prandom_shuffle(void *arr, size_t elsz, size_t count,
struct rnd_state *state);
#endif /* !__I915_SELFTESTS_RANDOM_H__ */ #endif /* !__I915_SELFTESTS_RANDOM_H__ */

View file

@ -25,9 +25,12 @@
#include <linux/prime_numbers.h> #include <linux/prime_numbers.h>
#include "../i915_selftest.h" #include "../i915_selftest.h"
#include "i915_random.h"
#include "igt_live_test.h" #include "igt_live_test.h"
#include "lib_sw_fence.h"
#include "mock_context.h" #include "mock_context.h"
#include "mock_drm.h"
#include "mock_gem_device.h" #include "mock_gem_device.h"
static int igt_add_request(void *arg) static int igt_add_request(void *arg)
@ -247,6 +250,254 @@ err_context_0:
return err; return err;
} }
struct smoketest {
struct intel_engine_cs *engine;
struct i915_gem_context **contexts;
atomic_long_t num_waits, num_fences;
int ncontexts, max_batch;
struct i915_request *(*request_alloc)(struct i915_gem_context *,
struct intel_engine_cs *);
};
static struct i915_request *
__mock_request_alloc(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
return mock_request(engine, ctx, 0);
}
static struct i915_request *
__live_request_alloc(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
return i915_request_alloc(engine, ctx);
}
static int __igt_breadcrumbs_smoketest(void *arg)
{
struct smoketest *t = arg;
struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
const unsigned int total = 4 * t->ncontexts + 1;
unsigned int num_waits = 0, num_fences = 0;
struct i915_request **requests;
I915_RND_STATE(prng);
unsigned int *order;
int err = 0;
/*
* A very simple test to catch the most egregious of list handling bugs.
*
* At its heart, we simply create oodles of requests running across
* multiple kthreads and enable signaling on them, for the sole purpose
* of stressing our breadcrumb handling. The only inspection we do is
* that the fences were marked as signaled.
*/
requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
if (!requests)
return -ENOMEM;
order = i915_random_order(total, &prng);
if (!order) {
err = -ENOMEM;
goto out_requests;
}
while (!kthread_should_stop()) {
struct i915_sw_fence *submit, *wait;
unsigned int n, count;
submit = heap_fence_create(GFP_KERNEL);
if (!submit) {
err = -ENOMEM;
break;
}
wait = heap_fence_create(GFP_KERNEL);
if (!wait) {
i915_sw_fence_commit(submit);
heap_fence_put(submit);
err = ENOMEM;
break;
}
i915_random_reorder(order, total, &prng);
count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
for (n = 0; n < count; n++) {
struct i915_gem_context *ctx =
t->contexts[order[n] % t->ncontexts];
struct i915_request *rq;
mutex_lock(BKL);
rq = t->request_alloc(ctx, t->engine);
if (IS_ERR(rq)) {
mutex_unlock(BKL);
err = PTR_ERR(rq);
count = n;
break;
}
err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
submit,
GFP_KERNEL);
requests[n] = i915_request_get(rq);
i915_request_add(rq);
mutex_unlock(BKL);
if (err >= 0)
err = i915_sw_fence_await_dma_fence(wait,
&rq->fence,
0,
GFP_KERNEL);
if (err < 0) {
i915_request_put(rq);
count = n;
break;
}
}
i915_sw_fence_commit(submit);
i915_sw_fence_commit(wait);
if (!wait_event_timeout(wait->wait,
i915_sw_fence_done(wait),
HZ / 2)) {
struct i915_request *rq = requests[count - 1];
pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
count,
rq->fence.context, rq->fence.seqno,
t->engine->name);
i915_gem_set_wedged(t->engine->i915);
GEM_BUG_ON(!i915_request_completed(rq));
i915_sw_fence_wait(wait);
err = -EIO;
}
for (n = 0; n < count; n++) {
struct i915_request *rq = requests[n];
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&rq->fence.flags)) {
pr_err("%llu:%llu was not signaled!\n",
rq->fence.context, rq->fence.seqno);
err = -EINVAL;
}
i915_request_put(rq);
}
heap_fence_put(wait);
heap_fence_put(submit);
if (err < 0)
break;
num_fences += count;
num_waits++;
cond_resched();
}
atomic_long_add(num_fences, &t->num_fences);
atomic_long_add(num_waits, &t->num_waits);
kfree(order);
out_requests:
kfree(requests);
return err;
}
static int mock_breadcrumbs_smoketest(void *arg)
{
struct drm_i915_private *i915 = arg;
struct smoketest t = {
.engine = i915->engine[RCS],
.ncontexts = 1024,
.max_batch = 1024,
.request_alloc = __mock_request_alloc
};
unsigned int ncpus = num_online_cpus();
struct task_struct **threads;
unsigned int n;
int ret = 0;
/*
* Smoketest our breadcrumb/signal handling for requests across multiple
* threads. A very simple test to only catch the most egregious of bugs.
* See __igt_breadcrumbs_smoketest();
*/
threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
if (!threads)
return -ENOMEM;
t.contexts =
kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
if (!t.contexts) {
ret = -ENOMEM;
goto out_threads;
}
mutex_lock(&t.engine->i915->drm.struct_mutex);
for (n = 0; n < t.ncontexts; n++) {
t.contexts[n] = mock_context(t.engine->i915, "mock");
if (!t.contexts[n]) {
ret = -ENOMEM;
goto out_contexts;
}
}
mutex_unlock(&t.engine->i915->drm.struct_mutex);
for (n = 0; n < ncpus; n++) {
threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
&t, "igt/%d", n);
if (IS_ERR(threads[n])) {
ret = PTR_ERR(threads[n]);
ncpus = n;
break;
}
get_task_struct(threads[n]);
}
msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
for (n = 0; n < ncpus; n++) {
int err;
err = kthread_stop(threads[n]);
if (err < 0 && !ret)
ret = err;
put_task_struct(threads[n]);
}
pr_info("Completed %lu waits for %lu fence across %d cpus\n",
atomic_long_read(&t.num_waits),
atomic_long_read(&t.num_fences),
ncpus);
mutex_lock(&t.engine->i915->drm.struct_mutex);
out_contexts:
for (n = 0; n < t.ncontexts; n++) {
if (!t.contexts[n])
break;
mock_context_close(t.contexts[n]);
}
mutex_unlock(&t.engine->i915->drm.struct_mutex);
kfree(t.contexts);
out_threads:
kfree(threads);
return ret;
}
int i915_request_mock_selftests(void) int i915_request_mock_selftests(void)
{ {
static const struct i915_subtest tests[] = { static const struct i915_subtest tests[] = {
@ -254,6 +505,7 @@ int i915_request_mock_selftests(void)
SUBTEST(igt_wait_request), SUBTEST(igt_wait_request),
SUBTEST(igt_fence_wait), SUBTEST(igt_fence_wait),
SUBTEST(igt_request_rewind), SUBTEST(igt_request_rewind),
SUBTEST(mock_breadcrumbs_smoketest),
}; };
struct drm_i915_private *i915; struct drm_i915_private *i915;
intel_wakeref_t wakeref; intel_wakeref_t wakeref;
@ -812,6 +1064,178 @@ out_unlock:
return err; return err;
} }
static int
max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
{
struct i915_request *rq;
int ret;
/*
* Before execlists, all contexts share the same ringbuffer. With
* execlists, each context/engine has a separate ringbuffer and
* for the purposes of this test, inexhaustible.
*
* For the global ringbuffer though, we have to be very careful
* that we do not wrap while preventing the execution of requests
* with a unsignaled fence.
*/
if (HAS_EXECLISTS(ctx->i915))
return INT_MAX;
rq = i915_request_alloc(engine, ctx);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
} else {
int sz;
ret = rq->ring->size - rq->reserved_space;
i915_request_add(rq);
sz = rq->ring->emit - rq->head;
if (sz < 0)
sz += rq->ring->size;
ret /= sz;
ret /= 2; /* leave half spare, in case of emergency! */
}
return ret;
}
static int live_breadcrumbs_smoketest(void *arg)
{
struct drm_i915_private *i915 = arg;
struct smoketest t[I915_NUM_ENGINES];
unsigned int ncpus = num_online_cpus();
unsigned long num_waits, num_fences;
struct intel_engine_cs *engine;
struct task_struct **threads;
struct igt_live_test live;
enum intel_engine_id id;
intel_wakeref_t wakeref;
struct drm_file *file;
unsigned int n;
int ret = 0;
/*
* Smoketest our breadcrumb/signal handling for requests across multiple
* threads. A very simple test to only catch the most egregious of bugs.
* See __igt_breadcrumbs_smoketest();
*
* On real hardware this time.
*/
wakeref = intel_runtime_pm_get(i915);
file = mock_file(i915);
if (IS_ERR(file)) {
ret = PTR_ERR(file);
goto out_rpm;
}
threads = kcalloc(ncpus * I915_NUM_ENGINES,
sizeof(*threads),
GFP_KERNEL);
if (!threads) {
ret = -ENOMEM;
goto out_file;
}
memset(&t[0], 0, sizeof(t[0]));
t[0].request_alloc = __live_request_alloc;
t[0].ncontexts = 64;
t[0].contexts = kmalloc_array(t[0].ncontexts,
sizeof(*t[0].contexts),
GFP_KERNEL);
if (!t[0].contexts) {
ret = -ENOMEM;
goto out_threads;
}
mutex_lock(&i915->drm.struct_mutex);
for (n = 0; n < t[0].ncontexts; n++) {
t[0].contexts[n] = live_context(i915, file);
if (!t[0].contexts[n]) {
ret = -ENOMEM;
goto out_contexts;
}
}
ret = igt_live_test_begin(&live, i915, __func__, "");
if (ret)
goto out_contexts;
for_each_engine(engine, i915, id) {
t[id] = t[0];
t[id].engine = engine;
t[id].max_batch = max_batches(t[0].contexts[0], engine);
if (t[id].max_batch < 0) {
ret = t[id].max_batch;
mutex_unlock(&i915->drm.struct_mutex);
goto out_flush;
}
/* One ring interleaved between requests from all cpus */
t[id].max_batch /= num_online_cpus() + 1;
pr_debug("Limiting batches to %d requests on %s\n",
t[id].max_batch, engine->name);
for (n = 0; n < ncpus; n++) {
struct task_struct *tsk;
tsk = kthread_run(__igt_breadcrumbs_smoketest,
&t[id], "igt/%d.%d", id, n);
if (IS_ERR(tsk)) {
ret = PTR_ERR(tsk);
mutex_unlock(&i915->drm.struct_mutex);
goto out_flush;
}
get_task_struct(tsk);
threads[id * ncpus + n] = tsk;
}
}
mutex_unlock(&i915->drm.struct_mutex);
msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
out_flush:
num_waits = 0;
num_fences = 0;
for_each_engine(engine, i915, id) {
for (n = 0; n < ncpus; n++) {
struct task_struct *tsk = threads[id * ncpus + n];
int err;
if (!tsk)
continue;
err = kthread_stop(tsk);
if (err < 0 && !ret)
ret = err;
put_task_struct(tsk);
}
num_waits += atomic_long_read(&t[id].num_waits);
num_fences += atomic_long_read(&t[id].num_fences);
}
pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus);
mutex_lock(&i915->drm.struct_mutex);
ret = igt_live_test_end(&live) ?: ret;
out_contexts:
mutex_unlock(&i915->drm.struct_mutex);
kfree(t[0].contexts);
out_threads:
kfree(threads);
out_file:
mock_file_free(i915, file);
out_rpm:
intel_runtime_pm_put(i915, wakeref);
return ret;
}
int i915_request_live_selftests(struct drm_i915_private *i915) int i915_request_live_selftests(struct drm_i915_private *i915)
{ {
static const struct i915_subtest tests[] = { static const struct i915_subtest tests[] = {
@ -819,6 +1243,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_all_engines), SUBTEST(live_all_engines),
SUBTEST(live_sequential_engines), SUBTEST(live_sequential_engines),
SUBTEST(live_empty_request), SUBTEST(live_empty_request),
SUBTEST(live_breadcrumbs_smoketest),
}; };
if (i915_terminally_wedged(&i915->gpu_error)) if (i915_terminally_wedged(&i915->gpu_error))

View file

@ -197,6 +197,49 @@ int i915_live_selftests(struct pci_dev *pdev)
return 0; return 0;
} }
static bool apply_subtest_filter(const char *caller, const char *name)
{
char *filter, *sep, *tok;
bool result = true;
filter = kstrdup(i915_selftest.filter, GFP_KERNEL);
for (sep = filter; (tok = strsep(&sep, ","));) {
bool allow = true;
char *sl;
if (*tok == '!') {
allow = false;
tok++;
}
if (*tok == '\0')
continue;
sl = strchr(tok, '/');
if (sl) {
*sl++ = '\0';
if (strcmp(tok, caller)) {
if (allow)
result = false;
continue;
}
tok = sl;
}
if (strcmp(tok, name)) {
if (allow)
result = false;
continue;
}
result = allow;
break;
}
kfree(filter);
return result;
}
int __i915_subtests(const char *caller, int __i915_subtests(const char *caller,
const struct i915_subtest *st, const struct i915_subtest *st,
unsigned int count, unsigned int count,
@ -209,6 +252,9 @@ int __i915_subtests(const char *caller,
if (signal_pending(current)) if (signal_pending(current))
return -EINTR; return -EINTR;
if (!apply_subtest_filter(caller, st->name))
continue;
pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name); pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name);
GEM_TRACE("Running %s/%s\n", caller, st->name); GEM_TRACE("Running %s/%s\n", caller, st->name);
@ -244,6 +290,7 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...)
module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400); module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400);
module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400);
module_param_named(st_filter, i915_selftest.filter, charp, 0400);
module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400);
MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)");

View file

@ -4,12 +4,155 @@
* Copyright © 2017-2018 Intel Corporation * Copyright © 2017-2018 Intel Corporation
*/ */
#include <linux/prime_numbers.h>
#include "../i915_selftest.h" #include "../i915_selftest.h"
#include "i915_random.h" #include "i915_random.h"
#include "igt_flush_test.h"
#include "mock_gem_device.h" #include "mock_gem_device.h"
#include "mock_timeline.h" #include "mock_timeline.h"
static struct page *hwsp_page(struct i915_timeline *tl)
{
struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
return sg_page(obj->mm.pages->sgl);
}
static unsigned long hwsp_cacheline(struct i915_timeline *tl)
{
unsigned long address = (unsigned long)page_address(hwsp_page(tl));
return (address + tl->hwsp_offset) / CACHELINE_BYTES;
}
#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
struct mock_hwsp_freelist {
struct drm_i915_private *i915;
struct radix_tree_root cachelines;
struct i915_timeline **history;
unsigned long count, max;
struct rnd_state prng;
};
enum {
SHUFFLE = BIT(0),
};
static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
unsigned int idx,
struct i915_timeline *tl)
{
tl = xchg(&state->history[idx], tl);
if (tl) {
radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
i915_timeline_put(tl);
}
}
static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
unsigned int count,
unsigned int flags)
{
struct i915_timeline *tl;
unsigned int idx;
while (count--) {
unsigned long cacheline;
int err;
tl = i915_timeline_create(state->i915, "mock", NULL);
if (IS_ERR(tl))
return PTR_ERR(tl);
cacheline = hwsp_cacheline(tl);
err = radix_tree_insert(&state->cachelines, cacheline, tl);
if (err) {
if (err == -EEXIST) {
pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
cacheline);
}
i915_timeline_put(tl);
return err;
}
idx = state->count++ % state->max;
__mock_hwsp_record(state, idx, tl);
}
if (flags & SHUFFLE)
i915_prandom_shuffle(state->history,
sizeof(*state->history),
min(state->count, state->max),
&state->prng);
count = i915_prandom_u32_max_state(min(state->count, state->max),
&state->prng);
while (count--) {
idx = --state->count % state->max;
__mock_hwsp_record(state, idx, NULL);
}
return 0;
}
static int mock_hwsp_freelist(void *arg)
{
struct mock_hwsp_freelist state;
const struct {
const char *name;
unsigned int flags;
} phases[] = {
{ "linear", 0 },
{ "shuffled", SHUFFLE },
{ },
}, *p;
unsigned int na;
int err = 0;
INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
state.i915 = mock_gem_device();
if (!state.i915)
return -ENOMEM;
/*
* Create a bunch of timelines and check that their HWSP do not overlap.
* Free some, and try again.
*/
state.max = PAGE_SIZE / sizeof(*state.history);
state.count = 0;
state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
if (!state.history) {
err = -ENOMEM;
goto err_put;
}
mutex_lock(&state.i915->drm.struct_mutex);
for (p = phases; p->name; p++) {
pr_debug("%s(%s)\n", __func__, p->name);
for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
err = __mock_hwsp_timeline(&state, na, p->flags);
if (err)
goto out;
}
}
out:
for (na = 0; na < state.max; na++)
__mock_hwsp_record(&state, na, NULL);
mutex_unlock(&state.i915->drm.struct_mutex);
kfree(state.history);
err_put:
drm_dev_put(&state.i915->drm);
return err;
}
struct __igt_sync { struct __igt_sync {
const char *name; const char *name;
u32 seqno; u32 seqno;
@ -256,12 +399,331 @@ static int bench_sync(void *arg)
return 0; return 0;
} }
int i915_gem_timeline_mock_selftests(void) int i915_timeline_mock_selftests(void)
{ {
static const struct i915_subtest tests[] = { static const struct i915_subtest tests[] = {
SUBTEST(mock_hwsp_freelist),
SUBTEST(igt_sync), SUBTEST(igt_sync),
SUBTEST(bench_sync), SUBTEST(bench_sync),
}; };
return i915_subtests(tests, NULL); return i915_subtests(tests, NULL);
} }
static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
{
u32 *cs;
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
if (INTEL_GEN(rq->i915) >= 8) {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = addr;
*cs++ = 0;
*cs++ = value;
} else if (INTEL_GEN(rq->i915) >= 4) {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = 0;
*cs++ = addr;
*cs++ = value;
} else {
*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
*cs++ = addr;
*cs++ = value;
*cs++ = MI_NOOP;
}
intel_ring_advance(rq, cs);
return 0;
}
static struct i915_request *
tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
{
struct i915_request *rq;
int err;
lockdep_assert_held(&tl->i915->drm.struct_mutex); /* lazy rq refs */
err = i915_timeline_pin(tl);
if (err) {
rq = ERR_PTR(err);
goto out;
}
rq = i915_request_alloc(engine, engine->i915->kernel_context);
if (IS_ERR(rq))
goto out_unpin;
err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
i915_request_add(rq);
if (err)
rq = ERR_PTR(err);
out_unpin:
i915_timeline_unpin(tl);
out:
if (IS_ERR(rq))
pr_err("Failed to write to timeline!\n");
return rq;
}
static struct i915_timeline *
checked_i915_timeline_create(struct drm_i915_private *i915)
{
struct i915_timeline *tl;
tl = i915_timeline_create(i915, "live", NULL);
if (IS_ERR(tl))
return tl;
if (*tl->hwsp_seqno != tl->seqno) {
pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
*tl->hwsp_seqno, tl->seqno);
i915_timeline_put(tl);
return ERR_PTR(-EINVAL);
}
return tl;
}
static int live_hwsp_engine(void *arg)
{
#define NUM_TIMELINES 4096
struct drm_i915_private *i915 = arg;
struct i915_timeline **timelines;
struct intel_engine_cs *engine;
enum intel_engine_id id;
intel_wakeref_t wakeref;
unsigned long count, n;
int err = 0;
/*
* Create a bunch of timelines and check we can write
* independently to each of their breadcrumb slots.
*/
timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
sizeof(*timelines),
GFP_KERNEL);
if (!timelines)
return -ENOMEM;
mutex_lock(&i915->drm.struct_mutex);
wakeref = intel_runtime_pm_get(i915);
count = 0;
for_each_engine(engine, i915, id) {
if (!intel_engine_can_store_dword(engine))
continue;
for (n = 0; n < NUM_TIMELINES; n++) {
struct i915_timeline *tl;
struct i915_request *rq;
tl = checked_i915_timeline_create(i915);
if (IS_ERR(tl)) {
err = PTR_ERR(tl);
goto out;
}
rq = tl_write(tl, engine, count);
if (IS_ERR(rq)) {
i915_timeline_put(tl);
err = PTR_ERR(rq);
goto out;
}
timelines[count++] = tl;
}
}
out:
if (igt_flush_test(i915, I915_WAIT_LOCKED))
err = -EIO;
for (n = 0; n < count; n++) {
struct i915_timeline *tl = timelines[n];
if (!err && *tl->hwsp_seqno != n) {
pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
n, *tl->hwsp_seqno);
err = -EINVAL;
}
i915_timeline_put(tl);
}
intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
kvfree(timelines);
return err;
#undef NUM_TIMELINES
}
static int live_hwsp_alternate(void *arg)
{
#define NUM_TIMELINES 4096
struct drm_i915_private *i915 = arg;
struct i915_timeline **timelines;
struct intel_engine_cs *engine;
enum intel_engine_id id;
intel_wakeref_t wakeref;
unsigned long count, n;
int err = 0;
/*
* Create a bunch of timelines and check we can write
* independently to each of their breadcrumb slots with adjacent
* engines.
*/
timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
sizeof(*timelines),
GFP_KERNEL);
if (!timelines)
return -ENOMEM;
mutex_lock(&i915->drm.struct_mutex);
wakeref = intel_runtime_pm_get(i915);
count = 0;
for (n = 0; n < NUM_TIMELINES; n++) {
for_each_engine(engine, i915, id) {
struct i915_timeline *tl;
struct i915_request *rq;
if (!intel_engine_can_store_dword(engine))
continue;
tl = checked_i915_timeline_create(i915);
if (IS_ERR(tl)) {
err = PTR_ERR(tl);
goto out;
}
rq = tl_write(tl, engine, count);
if (IS_ERR(rq)) {
i915_timeline_put(tl);
err = PTR_ERR(rq);
goto out;
}
timelines[count++] = tl;
}
}
out:
if (igt_flush_test(i915, I915_WAIT_LOCKED))
err = -EIO;
for (n = 0; n < count; n++) {
struct i915_timeline *tl = timelines[n];
if (!err && *tl->hwsp_seqno != n) {
pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
n, *tl->hwsp_seqno);
err = -EINVAL;
}
i915_timeline_put(tl);
}
intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
kvfree(timelines);
return err;
#undef NUM_TIMELINES
}
static int live_hwsp_recycle(void *arg)
{
struct drm_i915_private *i915 = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
intel_wakeref_t wakeref;
unsigned long count;
int err = 0;
/*
* Check seqno writes into one timeline at a time. We expect to
* recycle the breadcrumb slot between iterations and neither
* want to confuse ourselves or the GPU.
*/
mutex_lock(&i915->drm.struct_mutex);
wakeref = intel_runtime_pm_get(i915);
count = 0;
for_each_engine(engine, i915, id) {
IGT_TIMEOUT(end_time);
if (!intel_engine_can_store_dword(engine))
continue;
do {
struct i915_timeline *tl;
struct i915_request *rq;
tl = checked_i915_timeline_create(i915);
if (IS_ERR(tl)) {
err = PTR_ERR(tl);
goto out;
}
rq = tl_write(tl, engine, count);
if (IS_ERR(rq)) {
i915_timeline_put(tl);
err = PTR_ERR(rq);
goto out;
}
if (i915_request_wait(rq,
I915_WAIT_LOCKED,
HZ / 5) < 0) {
pr_err("Wait for timeline writes timed out!\n");
i915_timeline_put(tl);
err = -EIO;
goto out;
}
if (*tl->hwsp_seqno != count) {
pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
count, *tl->hwsp_seqno);
err = -EINVAL;
}
i915_timeline_put(tl);
count++;
if (err)
goto out;
i915_timelines_park(i915); /* Encourage recycling! */
} while (!__igt_timeout(end_time, NULL));
}
out:
if (igt_flush_test(i915, I915_WAIT_LOCKED))
err = -EIO;
intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
return err;
}
int i915_timeline_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_hwsp_recycle),
SUBTEST(live_hwsp_engine),
SUBTEST(live_hwsp_alternate),
};
return i915_subtests(tests, i915);
}

View file

@ -672,7 +672,7 @@ static int igt_vma_partial(void *arg)
} }
count = 0; count = 0;
list_for_each_entry(vma, &obj->vma_list, obj_link) list_for_each_entry(vma, &obj->vma.list, obj_link)
count++; count++;
if (count != nvma) { if (count != nvma) {
pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n", pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n",
@ -701,7 +701,7 @@ static int igt_vma_partial(void *arg)
i915_vma_unpin(vma); i915_vma_unpin(vma);
count = 0; count = 0;
list_for_each_entry(vma, &obj->vma_list, obj_link) list_for_each_entry(vma, &obj->vma.list, obj_link)
count++; count++;
if (count != nvma) { if (count != nvma) {
pr_err("(%s) allocated an extra full vma!\n", p->name); pr_err("(%s) allocated an extra full vma!\n", p->name);

View file

@ -35,7 +35,6 @@ int igt_live_test_begin(struct igt_live_test *t,
return err; return err;
} }
i915->gpu_error.missed_irq_rings = 0;
t->reset_global = i915_reset_count(&i915->gpu_error); t->reset_global = i915_reset_count(&i915->gpu_error);
for_each_engine(engine, i915, id) for_each_engine(engine, i915, id)
@ -75,11 +74,5 @@ int igt_live_test_end(struct igt_live_test *t)
return -EIO; return -EIO;
} }
if (i915->gpu_error.missed_irq_rings) {
pr_err("%s(%s): Missed interrupts on engines %lx\n",
t->func, t->name, i915->gpu_error.missed_irq_rings);
return -EIO;
}
return 0; return 0;
} }

View file

@ -185,11 +185,6 @@ void igt_spinner_fini(struct igt_spinner *spin)
bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq) bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq)
{ {
if (!wait_event_timeout(rq->execute,
READ_ONCE(rq->global_seqno),
msecs_to_jiffies(10)))
return false;
return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq), return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq),
rq->fence.seqno), rq->fence.seqno),
10) && 10) &&

View file

@ -1,470 +0,0 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include "../i915_selftest.h"
#include "i915_random.h"
#include "mock_gem_device.h"
#include "mock_engine.h"
static int check_rbtree(struct intel_engine_cs *engine,
const unsigned long *bitmap,
const struct intel_wait *waiters,
const int count)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct rb_node *rb;
int n;
if (&b->irq_wait->node != rb_first(&b->waiters)) {
pr_err("First waiter does not match first element of wait-tree\n");
return -EINVAL;
}
n = find_first_bit(bitmap, count);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = container_of(rb, typeof(*w), node);
int idx = w - waiters;
if (!test_bit(idx, bitmap)) {
pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n",
idx, w->seqno);
return -EINVAL;
}
if (n != idx) {
pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n",
idx, w->seqno, n);
return -EINVAL;
}
n = find_next_bit(bitmap, count, n + 1);
}
return 0;
}
static int check_completion(struct intel_engine_cs *engine,
const unsigned long *bitmap,
const struct intel_wait *waiters,
const int count)
{
int n;
for (n = 0; n < count; n++) {
if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap))
continue;
pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n",
n, waiters[n].seqno,
intel_wait_complete(&waiters[n]) ? "complete" : "active",
test_bit(n, bitmap) ? "active" : "complete");
return -EINVAL;
}
return 0;
}
static int check_rbtree_empty(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
if (b->irq_wait) {
pr_err("Empty breadcrumbs still has a waiter\n");
return -EINVAL;
}
if (!RB_EMPTY_ROOT(&b->waiters)) {
pr_err("Empty breadcrumbs, but wait-tree not empty\n");
return -EINVAL;
}
return 0;
}
static int igt_random_insert_remove(void *arg)
{
const u32 seqno_bias = 0x1000;
I915_RND_STATE(prng);
struct intel_engine_cs *engine = arg;
struct intel_wait *waiters;
const int count = 4096;
unsigned int *order;
unsigned long *bitmap;
int err = -ENOMEM;
int n;
mock_engine_reset(engine);
waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
if (!waiters)
goto out_engines;
bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap),
GFP_KERNEL);
if (!bitmap)
goto out_waiters;
order = i915_random_order(count, &prng);
if (!order)
goto out_bitmap;
for (n = 0; n < count; n++)
intel_wait_init_for_seqno(&waiters[n], seqno_bias + n);
err = check_rbtree(engine, bitmap, waiters, count);
if (err)
goto out_order;
/* Add and remove waiters into the rbtree in random order. At each
* step, we verify that the rbtree is correctly ordered.
*/
for (n = 0; n < count; n++) {
int i = order[n];
intel_engine_add_wait(engine, &waiters[i]);
__set_bit(i, bitmap);
err = check_rbtree(engine, bitmap, waiters, count);
if (err)
goto out_order;
}
i915_random_reorder(order, count, &prng);
for (n = 0; n < count; n++) {
int i = order[n];
intel_engine_remove_wait(engine, &waiters[i]);
__clear_bit(i, bitmap);
err = check_rbtree(engine, bitmap, waiters, count);
if (err)
goto out_order;
}
err = check_rbtree_empty(engine);
out_order:
kfree(order);
out_bitmap:
kfree(bitmap);
out_waiters:
kvfree(waiters);
out_engines:
mock_engine_flush(engine);
return err;
}
static int igt_insert_complete(void *arg)
{
const u32 seqno_bias = 0x1000;
struct intel_engine_cs *engine = arg;
struct intel_wait *waiters;
const int count = 4096;
unsigned long *bitmap;
int err = -ENOMEM;
int n, m;
mock_engine_reset(engine);
waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
if (!waiters)
goto out_engines;
bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap),
GFP_KERNEL);
if (!bitmap)
goto out_waiters;
for (n = 0; n < count; n++) {
intel_wait_init_for_seqno(&waiters[n], n + seqno_bias);
intel_engine_add_wait(engine, &waiters[n]);
__set_bit(n, bitmap);
}
err = check_rbtree(engine, bitmap, waiters, count);
if (err)
goto out_bitmap;
/* On each step, we advance the seqno so that several waiters are then
* complete (we increase the seqno by increasingly larger values to
* retire more and more waiters at once). All retired waiters should
* be woken and removed from the rbtree, and so that we check.
*/
for (n = 0; n < count; n = m) {
int seqno = 2 * n;
GEM_BUG_ON(find_first_bit(bitmap, count) != n);
if (intel_wait_complete(&waiters[n])) {
pr_err("waiter[%d, seqno=%d] completed too early\n",
n, waiters[n].seqno);
err = -EINVAL;
goto out_bitmap;
}
/* complete the following waiters */
mock_seqno_advance(engine, seqno + seqno_bias);
for (m = n; m <= seqno; m++) {
if (m == count)
break;
GEM_BUG_ON(!test_bit(m, bitmap));
__clear_bit(m, bitmap);
}
intel_engine_remove_wait(engine, &waiters[n]);
RB_CLEAR_NODE(&waiters[n].node);
err = check_rbtree(engine, bitmap, waiters, count);
if (err) {
pr_err("rbtree corrupt after seqno advance to %d\n",
seqno + seqno_bias);
goto out_bitmap;
}
err = check_completion(engine, bitmap, waiters, count);
if (err) {
pr_err("completions after seqno advance to %d failed\n",
seqno + seqno_bias);
goto out_bitmap;
}
}
err = check_rbtree_empty(engine);
out_bitmap:
kfree(bitmap);
out_waiters:
kvfree(waiters);
out_engines:
mock_engine_flush(engine);
return err;
}
struct igt_wakeup {
struct task_struct *tsk;
atomic_t *ready, *set, *done;
struct intel_engine_cs *engine;
unsigned long flags;
#define STOP 0
#define IDLE 1
wait_queue_head_t *wq;
u32 seqno;
};
static bool wait_for_ready(struct igt_wakeup *w)
{
DEFINE_WAIT(ready);
set_bit(IDLE, &w->flags);
if (atomic_dec_and_test(w->done))
wake_up_var(w->done);
if (test_bit(STOP, &w->flags))
goto out;
for (;;) {
prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE);
if (atomic_read(w->ready) == 0)
break;
schedule();
}
finish_wait(w->wq, &ready);
out:
clear_bit(IDLE, &w->flags);
if (atomic_dec_and_test(w->set))
wake_up_var(w->set);
return !test_bit(STOP, &w->flags);
}
static int igt_wakeup_thread(void *arg)
{
struct igt_wakeup *w = arg;
struct intel_wait wait;
while (wait_for_ready(w)) {
GEM_BUG_ON(kthread_should_stop());
intel_wait_init_for_seqno(&wait, w->seqno);
intel_engine_add_wait(w->engine, &wait);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (i915_seqno_passed(intel_engine_get_seqno(w->engine),
w->seqno))
break;
if (test_bit(STOP, &w->flags)) /* emergency escape */
break;
schedule();
}
intel_engine_remove_wait(w->engine, &wait);
__set_current_state(TASK_RUNNING);
}
return 0;
}
static void igt_wake_all_sync(atomic_t *ready,
atomic_t *set,
atomic_t *done,
wait_queue_head_t *wq,
int count)
{
atomic_set(set, count);
atomic_set(ready, 0);
wake_up_all(wq);
wait_var_event(set, !atomic_read(set));
atomic_set(ready, count);
atomic_set(done, count);
}
static int igt_wakeup(void *arg)
{
I915_RND_STATE(prng);
struct intel_engine_cs *engine = arg;
struct igt_wakeup *waiters;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
const int count = 4096;
const u32 max_seqno = count / 4;
atomic_t ready, set, done;
int err = -ENOMEM;
int n, step;
mock_engine_reset(engine);
waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
if (!waiters)
goto out_engines;
/* Create a large number of threads, each waiting on a random seqno.
* Multiple waiters will be waiting for the same seqno.
*/
atomic_set(&ready, count);
for (n = 0; n < count; n++) {
waiters[n].wq = &wq;
waiters[n].ready = &ready;
waiters[n].set = &set;
waiters[n].done = &done;
waiters[n].engine = engine;
waiters[n].flags = BIT(IDLE);
waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n],
"i915/igt:%d", n);
if (IS_ERR(waiters[n].tsk))
goto out_waiters;
get_task_struct(waiters[n].tsk);
}
for (step = 1; step <= max_seqno; step <<= 1) {
u32 seqno;
/* The waiter threads start paused as we assign them a random
* seqno and reset the engine. Once the engine is reset,
* we signal that the threads may begin their wait upon their
* seqno.
*/
for (n = 0; n < count; n++) {
GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags));
waiters[n].seqno =
1 + prandom_u32_state(&prng) % max_seqno;
}
mock_seqno_advance(engine, 0);
igt_wake_all_sync(&ready, &set, &done, &wq, count);
/* Simulate the GPU doing chunks of work, with one or more
* seqno appearing to finish at the same time. A random number
* of threads will be waiting upon the update and hopefully be
* woken.
*/
for (seqno = 1; seqno <= max_seqno + step; seqno += step) {
usleep_range(50, 500);
mock_seqno_advance(engine, seqno);
}
GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno);
/* With the seqno now beyond any of the waiting threads, they
* should all be woken, see that they are complete and signal
* that they are ready for the next test. We wait until all
* threads are complete and waiting for us (i.e. not a seqno).
*/
if (!wait_var_event_timeout(&done,
!atomic_read(&done), 10 * HZ)) {
pr_err("Timed out waiting for %d remaining waiters\n",
atomic_read(&done));
err = -ETIMEDOUT;
break;
}
err = check_rbtree_empty(engine);
if (err)
break;
}
out_waiters:
for (n = 0; n < count; n++) {
if (IS_ERR(waiters[n].tsk))
break;
set_bit(STOP, &waiters[n].flags);
}
mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */
igt_wake_all_sync(&ready, &set, &done, &wq, n);
for (n = 0; n < count; n++) {
if (IS_ERR(waiters[n].tsk))
break;
kthread_stop(waiters[n].tsk);
put_task_struct(waiters[n].tsk);
}
kvfree(waiters);
out_engines:
mock_engine_flush(engine);
return err;
}
int intel_breadcrumbs_mock_selftests(void)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_random_insert_remove),
SUBTEST(igt_insert_complete),
SUBTEST(igt_wakeup),
};
struct drm_i915_private *i915;
int err;
i915 = mock_gem_device();
if (!i915)
return -ENOMEM;
err = i915_subtests(tests, i915->engine[RCS]);
drm_dev_put(&i915->drm);
return err;
}

View file

@ -363,9 +363,7 @@ static int igt_global_reset(void *arg)
/* Check that we can issue a global GPU reset */ /* Check that we can issue a global GPU reset */
igt_global_reset_lock(i915); igt_global_reset_lock(i915);
set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
mutex_lock(&i915->drm.struct_mutex);
reset_count = i915_reset_count(&i915->gpu_error); reset_count = i915_reset_count(&i915->gpu_error);
i915_reset(i915, ALL_ENGINES, NULL); i915_reset(i915, ALL_ENGINES, NULL);
@ -374,9 +372,7 @@ static int igt_global_reset(void *arg)
pr_err("No GPU reset recorded!\n"); pr_err("No GPU reset recorded!\n");
err = -EINVAL; err = -EINVAL;
} }
mutex_unlock(&i915->drm.struct_mutex);
GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
igt_global_reset_unlock(i915); igt_global_reset_unlock(i915);
if (i915_terminally_wedged(&i915->gpu_error)) if (i915_terminally_wedged(&i915->gpu_error))
@ -393,18 +389,16 @@ static int igt_wedged_reset(void *arg)
/* Check that we can recover a wedged device with a GPU reset */ /* Check that we can recover a wedged device with a GPU reset */
igt_global_reset_lock(i915); igt_global_reset_lock(i915);
mutex_lock(&i915->drm.struct_mutex);
wakeref = intel_runtime_pm_get(i915); wakeref = intel_runtime_pm_get(i915);
i915_gem_set_wedged(i915); i915_gem_set_wedged(i915);
GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); mutex_lock(&i915->drm.struct_mutex);
GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
i915_reset(i915, ALL_ENGINES, NULL); i915_reset(i915, ALL_ENGINES, NULL);
GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); mutex_unlock(&i915->drm.struct_mutex);
intel_runtime_pm_put(i915, wakeref); intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
igt_global_reset_unlock(i915); igt_global_reset_unlock(i915);
return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
@ -455,8 +449,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
do { do {
u32 seqno = intel_engine_get_seqno(engine);
if (active) { if (active) {
struct i915_request *rq; struct i915_request *rq;
@ -485,8 +477,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
break; break;
} }
GEM_BUG_ON(!rq->global_seqno);
seqno = rq->global_seqno - 1;
i915_request_put(rq); i915_request_put(rq);
} }
@ -502,16 +492,15 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
break; break;
} }
reset_engine_count += active;
if (i915_reset_engine_count(&i915->gpu_error, engine) != if (i915_reset_engine_count(&i915->gpu_error, engine) !=
reset_engine_count) { ++reset_engine_count) {
pr_err("%s engine reset %srecorded!\n", pr_err("%s engine reset not recorded!\n",
engine->name, active ? "not " : ""); engine->name);
err = -EINVAL; err = -EINVAL;
break; break;
} }
if (!wait_for_idle(engine)) { if (!i915_reset_flush(i915)) {
struct drm_printer p = struct drm_printer p =
drm_info_printer(i915->drm.dev); drm_info_printer(i915->drm.dev);
@ -734,7 +723,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
do { do {
u32 seqno = intel_engine_get_seqno(engine);
struct i915_request *rq = NULL; struct i915_request *rq = NULL;
if (flags & TEST_ACTIVE) { if (flags & TEST_ACTIVE) {
@ -762,9 +750,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
err = -EIO; err = -EIO;
break; break;
} }
GEM_BUG_ON(!rq->global_seqno);
seqno = rq->global_seqno - 1;
} }
err = i915_reset_engine(engine, NULL); err = i915_reset_engine(engine, NULL);
@ -801,10 +786,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
reported = i915_reset_engine_count(&i915->gpu_error, engine); reported = i915_reset_engine_count(&i915->gpu_error, engine);
reported -= threads[engine->id].resets; reported -= threads[engine->id].resets;
if (reported != (flags & TEST_ACTIVE ? count : 0)) { if (reported != count) {
pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n", pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
engine->name, test_name, count, reported, engine->name, test_name, count, reported);
(flags & TEST_ACTIVE ? count : 0));
if (!err) if (!err)
err = -EINVAL; err = -EINVAL;
} }
@ -903,20 +887,13 @@ static int igt_reset_engines(void *arg)
return 0; return 0;
} }
static u32 fake_hangcheck(struct i915_request *rq, u32 mask) static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask)
{ {
struct i915_gpu_error *error = &rq->i915->gpu_error; u32 count = i915_reset_count(&i915->gpu_error);
u32 reset_count = i915_reset_count(error);
error->stalled_mask = mask; i915_reset(i915, mask, NULL);
/* set_bit() must be after we have setup the backchannel (mask) */ return count;
smp_mb__before_atomic();
set_bit(I915_RESET_HANDOFF, &error->flags);
wake_up_all(&error->wait_queue);
return reset_count;
} }
static int igt_reset_wait(void *arg) static int igt_reset_wait(void *arg)
@ -962,7 +939,7 @@ static int igt_reset_wait(void *arg)
goto out_rq; goto out_rq;
} }
reset_count = fake_hangcheck(rq, ALL_ENGINES); reset_count = fake_hangcheck(i915, ALL_ENGINES);
timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
if (timeout < 0) { if (timeout < 0) {
@ -972,7 +949,6 @@ static int igt_reset_wait(void *arg)
goto out_rq; goto out_rq;
} }
GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
if (i915_reset_count(&i915->gpu_error) == reset_count) { if (i915_reset_count(&i915->gpu_error) == reset_count) {
pr_err("No GPU reset recorded!\n"); pr_err("No GPU reset recorded!\n");
err = -EINVAL; err = -EINVAL;
@ -1151,7 +1127,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
wait_for_completion(&arg.completion); wait_for_completion(&arg.completion);
if (wait_for(waitqueue_active(&rq->execute), 10)) { if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {
struct drm_printer p = drm_info_printer(i915->drm.dev); struct drm_printer p = drm_info_printer(i915->drm.dev);
pr_err("igt/evict_vma kthread did not wait\n"); pr_err("igt/evict_vma kthread did not wait\n");
@ -1162,7 +1138,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
} }
out_reset: out_reset:
fake_hangcheck(rq, intel_engine_flag(rq->engine)); fake_hangcheck(rq->i915, intel_engine_flag(rq->engine));
if (tsk) { if (tsk) {
struct igt_wedge_me w; struct igt_wedge_me w;
@ -1341,12 +1317,7 @@ static int igt_reset_queue(void *arg)
goto fini; goto fini;
} }
reset_count = fake_hangcheck(prev, ENGINE_MASK(id)); reset_count = fake_hangcheck(i915, ENGINE_MASK(id));
i915_reset(i915, ENGINE_MASK(id), NULL);
GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
&i915->gpu_error.flags));
if (prev->fence.error != -EIO) { if (prev->fence.error != -EIO) {
pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
@ -1565,6 +1536,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
pr_err("%s(%s): Failed to start request %llx, at %x\n", pr_err("%s(%s): Failed to start request %llx, at %x\n",
__func__, engine->name, __func__, engine->name,
rq->fence.seqno, hws_seqno(&h, rq)); rq->fence.seqno, hws_seqno(&h, rq));
i915_gem_set_wedged(i915);
err = -EIO; err = -EIO;
} }
@ -1588,7 +1560,6 @@ out:
static void force_reset(struct drm_i915_private *i915) static void force_reset(struct drm_i915_private *i915)
{ {
i915_gem_set_wedged(i915); i915_gem_set_wedged(i915);
set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
i915_reset(i915, 0, NULL); i915_reset(i915, 0, NULL);
} }
@ -1618,6 +1589,26 @@ static int igt_atomic_reset(void *arg)
if (i915_terminally_wedged(&i915->gpu_error)) if (i915_terminally_wedged(&i915->gpu_error))
goto unlock; goto unlock;
if (intel_has_gpu_reset(i915)) {
const typeof(*phases) *p;
for (p = phases; p->name; p++) {
GEM_TRACE("intel_gpu_reset under %s\n", p->name);
p->critical_section_begin();
err = intel_gpu_reset(i915, ALL_ENGINES);
p->critical_section_end();
if (err) {
pr_err("intel_gpu_reset failed under %s\n",
p->name);
goto out;
}
}
force_reset(i915);
}
if (intel_has_reset_engine(i915)) { if (intel_has_reset_engine(i915)) {
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
enum intel_engine_id id; enum intel_engine_id id;
@ -1674,6 +1665,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
wakeref = intel_runtime_pm_get(i915); wakeref = intel_runtime_pm_get(i915);
saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */
err = i915_subtests(tests, i915); err = i915_subtests(tests, i915);

View file

@ -268,6 +268,143 @@ err_wedged:
goto err_ctx_lo; goto err_ctx_lo;
} }
struct preempt_client {
struct igt_spinner spin;
struct i915_gem_context *ctx;
};
static int preempt_client_init(struct drm_i915_private *i915,
struct preempt_client *c)
{
c->ctx = kernel_context(i915);
if (!c->ctx)
return -ENOMEM;
if (igt_spinner_init(&c->spin, i915))
goto err_ctx;
return 0;
err_ctx:
kernel_context_close(c->ctx);
return -ENOMEM;
}
static void preempt_client_fini(struct preempt_client *c)
{
igt_spinner_fini(&c->spin);
kernel_context_close(c->ctx);
}
static int live_suppress_self_preempt(void *arg)
{
struct drm_i915_private *i915 = arg;
struct intel_engine_cs *engine;
struct i915_sched_attr attr = {
.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
};
struct preempt_client a, b;
enum intel_engine_id id;
intel_wakeref_t wakeref;
int err = -ENOMEM;
/*
* Verify that if a preemption request does not cause a change in
* the current execution order, the preempt-to-idle injection is
* skipped and that we do not accidentally apply it after the CS
* completion event.
*/
if (!HAS_LOGICAL_RING_PREEMPTION(i915))
return 0;
if (USES_GUC_SUBMISSION(i915))
return 0; /* presume black blox */
mutex_lock(&i915->drm.struct_mutex);
wakeref = intel_runtime_pm_get(i915);
if (preempt_client_init(i915, &a))
goto err_unlock;
if (preempt_client_init(i915, &b))
goto err_client_a;
for_each_engine(engine, i915, id) {
struct i915_request *rq_a, *rq_b;
int depth;
engine->execlists.preempt_hang.count = 0;
rq_a = igt_spinner_create_request(&a.spin,
a.ctx, engine,
MI_NOOP);
if (IS_ERR(rq_a)) {
err = PTR_ERR(rq_a);
goto err_client_b;
}
i915_request_add(rq_a);
if (!igt_wait_for_spinner(&a.spin, rq_a)) {
pr_err("First client failed to start\n");
goto err_wedged;
}
for (depth = 0; depth < 8; depth++) {
rq_b = igt_spinner_create_request(&b.spin,
b.ctx, engine,
MI_NOOP);
if (IS_ERR(rq_b)) {
err = PTR_ERR(rq_b);
goto err_client_b;
}
i915_request_add(rq_b);
GEM_BUG_ON(i915_request_completed(rq_a));
engine->schedule(rq_a, &attr);
igt_spinner_end(&a.spin);
if (!igt_wait_for_spinner(&b.spin, rq_b)) {
pr_err("Second client failed to start\n");
goto err_wedged;
}
swap(a, b);
rq_a = rq_b;
}
igt_spinner_end(&a.spin);
if (engine->execlists.preempt_hang.count) {
pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n",
engine->execlists.preempt_hang.count,
depth);
err = -EINVAL;
goto err_client_b;
}
if (igt_flush_test(i915, I915_WAIT_LOCKED))
goto err_wedged;
}
err = 0;
err_client_b:
preempt_client_fini(&b);
err_client_a:
preempt_client_fini(&a);
err_unlock:
if (igt_flush_test(i915, I915_WAIT_LOCKED))
err = -EIO;
intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
igt_spinner_end(&b.spin);
igt_spinner_end(&a.spin);
i915_gem_set_wedged(i915);
err = -EIO;
goto err_client_b;
}
static int live_preempt_hang(void *arg) static int live_preempt_hang(void *arg)
{ {
struct drm_i915_private *i915 = arg; struct drm_i915_private *i915 = arg;
@ -647,6 +784,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_sanitycheck), SUBTEST(live_sanitycheck),
SUBTEST(live_preempt), SUBTEST(live_preempt),
SUBTEST(live_late_preempt), SUBTEST(live_late_preempt),
SUBTEST(live_suppress_self_preempt),
SUBTEST(live_preempt_hang), SUBTEST(live_preempt_hang),
SUBTEST(live_preempt_smoke), SUBTEST(live_preempt_smoke),
}; };

View file

@ -214,7 +214,6 @@ out_put:
static int do_device_reset(struct intel_engine_cs *engine) static int do_device_reset(struct intel_engine_cs *engine)
{ {
set_bit(I915_RESET_HANDOFF, &engine->i915->gpu_error.flags);
i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds"); i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds");
return 0; return 0;
} }
@ -394,7 +393,6 @@ static int
live_gpu_reset_gt_engine_workarounds(void *arg) live_gpu_reset_gt_engine_workarounds(void *arg)
{ {
struct drm_i915_private *i915 = arg; struct drm_i915_private *i915 = arg;
struct i915_gpu_error *error = &i915->gpu_error;
intel_wakeref_t wakeref; intel_wakeref_t wakeref;
struct wa_lists lists; struct wa_lists lists;
bool ok; bool ok;
@ -413,7 +411,6 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
if (!ok) if (!ok)
goto out; goto out;
set_bit(I915_RESET_HANDOFF, &error->flags);
i915_reset(i915, ALL_ENGINES, "live_workarounds"); i915_reset(i915, ALL_ENGINES, "live_workarounds");
ok = verify_gt_engine_wa(i915, &lists, "after reset"); ok = verify_gt_engine_wa(i915, &lists, "after reset");

View file

@ -76,3 +76,57 @@ void timed_fence_fini(struct timed_fence *tf)
destroy_timer_on_stack(&tf->timer); destroy_timer_on_stack(&tf->timer);
i915_sw_fence_fini(&tf->fence); i915_sw_fence_fini(&tf->fence);
} }
struct heap_fence {
struct i915_sw_fence fence;
union {
struct kref ref;
struct rcu_head rcu;
};
};
static int __i915_sw_fence_call
heap_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
struct heap_fence *h = container_of(fence, typeof(*h), fence);
switch (state) {
case FENCE_COMPLETE:
break;
case FENCE_FREE:
heap_fence_put(&h->fence);
}
return NOTIFY_DONE;
}
struct i915_sw_fence *heap_fence_create(gfp_t gfp)
{
struct heap_fence *h;
h = kmalloc(sizeof(*h), gfp);
if (!h)
return NULL;
i915_sw_fence_init(&h->fence, heap_fence_notify);
refcount_set(&h->ref.refcount, 2);
return &h->fence;
}
static void heap_fence_release(struct kref *ref)
{
struct heap_fence *h = container_of(ref, typeof(*h), ref);
i915_sw_fence_fini(&h->fence);
kfree_rcu(h, rcu);
}
void heap_fence_put(struct i915_sw_fence *fence)
{
struct heap_fence *h = container_of(fence, typeof(*h), fence);
kref_put(&h->ref, heap_fence_release);
}

View file

@ -39,4 +39,7 @@ struct timed_fence {
void timed_fence_init(struct timed_fence *tf, unsigned long expires); void timed_fence_init(struct timed_fence *tf, unsigned long expires);
void timed_fence_fini(struct timed_fence *tf); void timed_fence_fini(struct timed_fence *tf);
struct i915_sw_fence *heap_fence_create(gfp_t gfp);
void heap_fence_put(struct i915_sw_fence *fence);
#endif /* _LIB_SW_FENCE_H_ */ #endif /* _LIB_SW_FENCE_H_ */

View file

@ -30,6 +30,17 @@ struct mock_ring {
struct i915_timeline timeline; struct i915_timeline timeline;
}; };
static void mock_timeline_pin(struct i915_timeline *tl)
{
tl->pin_count++;
}
static void mock_timeline_unpin(struct i915_timeline *tl)
{
GEM_BUG_ON(!tl->pin_count);
tl->pin_count--;
}
static struct intel_ring *mock_ring(struct intel_engine_cs *engine) static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
{ {
const unsigned long sz = PAGE_SIZE / 2; const unsigned long sz = PAGE_SIZE / 2;
@ -39,7 +50,12 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
if (!ring) if (!ring)
return NULL; return NULL;
i915_timeline_init(engine->i915, &ring->timeline, engine->name); if (i915_timeline_init(engine->i915,
&ring->timeline, engine->name,
NULL)) {
kfree(ring);
return NULL;
}
ring->base.size = sz; ring->base.size = sz;
ring->base.effective_size = sz; ring->base.effective_size = sz;
@ -70,15 +86,21 @@ static struct mock_request *first_request(struct mock_engine *engine)
static void advance(struct mock_request *request) static void advance(struct mock_request *request)
{ {
list_del_init(&request->link); list_del_init(&request->link);
mock_seqno_advance(request->base.engine, request->base.global_seqno); intel_engine_write_global_seqno(request->base.engine,
request->base.global_seqno);
i915_request_mark_complete(&request->base);
GEM_BUG_ON(!i915_request_completed(&request->base));
intel_engine_queue_breadcrumbs(request->base.engine);
} }
static void hw_delay_complete(struct timer_list *t) static void hw_delay_complete(struct timer_list *t)
{ {
struct mock_engine *engine = from_timer(engine, t, hw_delay); struct mock_engine *engine = from_timer(engine, t, hw_delay);
struct mock_request *request; struct mock_request *request;
unsigned long flags;
spin_lock(&engine->hw_lock); spin_lock_irqsave(&engine->hw_lock, flags);
/* Timer fired, first request is complete */ /* Timer fired, first request is complete */
request = first_request(engine); request = first_request(engine);
@ -98,11 +120,12 @@ static void hw_delay_complete(struct timer_list *t)
advance(request); advance(request);
} }
spin_unlock(&engine->hw_lock); spin_unlock_irqrestore(&engine->hw_lock, flags);
} }
static void mock_context_unpin(struct intel_context *ce) static void mock_context_unpin(struct intel_context *ce)
{ {
mock_timeline_unpin(ce->ring->timeline);
i915_gem_context_put(ce->gem_context); i915_gem_context_put(ce->gem_context);
} }
@ -124,6 +147,7 @@ mock_context_pin(struct intel_engine_cs *engine,
struct i915_gem_context *ctx) struct i915_gem_context *ctx)
{ {
struct intel_context *ce = to_intel_context(ctx, engine); struct intel_context *ce = to_intel_context(ctx, engine);
int err = -ENOMEM;
if (ce->pin_count++) if (ce->pin_count++)
return ce; return ce;
@ -134,13 +158,15 @@ mock_context_pin(struct intel_engine_cs *engine,
goto err; goto err;
} }
mock_timeline_pin(ce->ring->timeline);
ce->ops = &mock_context_ops; ce->ops = &mock_context_ops;
i915_gem_context_get(ctx); i915_gem_context_get(ctx);
return ce; return ce;
err: err:
ce->pin_count = 0; ce->pin_count = 0;
return ERR_PTR(-ENOMEM); return ERR_PTR(err);
} }
static int mock_request_alloc(struct i915_request *request) static int mock_request_alloc(struct i915_request *request)
@ -159,9 +185,9 @@ static int mock_emit_flush(struct i915_request *request,
return 0; return 0;
} }
static void mock_emit_breadcrumb(struct i915_request *request, static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs)
u32 *flags)
{ {
return cs;
} }
static void mock_submit_request(struct i915_request *request) static void mock_submit_request(struct i915_request *request)
@ -169,11 +195,12 @@ static void mock_submit_request(struct i915_request *request)
struct mock_request *mock = container_of(request, typeof(*mock), base); struct mock_request *mock = container_of(request, typeof(*mock), base);
struct mock_engine *engine = struct mock_engine *engine =
container_of(request->engine, typeof(*engine), base); container_of(request->engine, typeof(*engine), base);
unsigned long flags;
i915_request_submit(request); i915_request_submit(request);
GEM_BUG_ON(!request->global_seqno); GEM_BUG_ON(!request->global_seqno);
spin_lock_irq(&engine->hw_lock); spin_lock_irqsave(&engine->hw_lock, flags);
list_add_tail(&mock->link, &engine->hw_queue); list_add_tail(&mock->link, &engine->hw_queue);
if (mock->link.prev == &engine->hw_queue) { if (mock->link.prev == &engine->hw_queue) {
if (mock->delay) if (mock->delay)
@ -181,7 +208,7 @@ static void mock_submit_request(struct i915_request *request)
else else
advance(mock); advance(mock);
} }
spin_unlock_irq(&engine->hw_lock); spin_unlock_irqrestore(&engine->hw_lock, flags);
} }
struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
@ -200,15 +227,19 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
engine->base.i915 = i915; engine->base.i915 = i915;
snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
engine->base.id = id; engine->base.id = id;
engine->base.status_page.page_addr = (void *)(engine + 1); engine->base.status_page.addr = (void *)(engine + 1);
engine->base.context_pin = mock_context_pin; engine->base.context_pin = mock_context_pin;
engine->base.request_alloc = mock_request_alloc; engine->base.request_alloc = mock_request_alloc;
engine->base.emit_flush = mock_emit_flush; engine->base.emit_flush = mock_emit_flush;
engine->base.emit_breadcrumb = mock_emit_breadcrumb; engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
engine->base.submit_request = mock_submit_request; engine->base.submit_request = mock_submit_request;
i915_timeline_init(i915, &engine->base.timeline, engine->base.name); if (i915_timeline_init(i915,
&engine->base.timeline,
engine->base.name,
NULL))
goto err_free;
i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE); i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
intel_engine_init_breadcrumbs(&engine->base); intel_engine_init_breadcrumbs(&engine->base);
@ -226,6 +257,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
err_breadcrumbs: err_breadcrumbs:
intel_engine_fini_breadcrumbs(&engine->base); intel_engine_fini_breadcrumbs(&engine->base);
i915_timeline_fini(&engine->base.timeline); i915_timeline_fini(&engine->base.timeline);
err_free:
kfree(engine); kfree(engine);
return NULL; return NULL;
} }
@ -246,7 +278,7 @@ void mock_engine_flush(struct intel_engine_cs *engine)
void mock_engine_reset(struct intel_engine_cs *engine) void mock_engine_reset(struct intel_engine_cs *engine)
{ {
intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0); intel_engine_write_global_seqno(engine, 0);
} }
void mock_engine_free(struct intel_engine_cs *engine) void mock_engine_free(struct intel_engine_cs *engine)

View file

@ -46,10 +46,4 @@ void mock_engine_flush(struct intel_engine_cs *engine);
void mock_engine_reset(struct intel_engine_cs *engine); void mock_engine_reset(struct intel_engine_cs *engine);
void mock_engine_free(struct intel_engine_cs *engine); void mock_engine_free(struct intel_engine_cs *engine);
static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno)
{
intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
intel_engine_wakeup(engine);
}
#endif /* !__MOCK_ENGINE_H__ */ #endif /* !__MOCK_ENGINE_H__ */

View file

@ -58,8 +58,8 @@ static void mock_device_release(struct drm_device *dev)
i915_gem_contexts_lost(i915); i915_gem_contexts_lost(i915);
mutex_unlock(&i915->drm.struct_mutex); mutex_unlock(&i915->drm.struct_mutex);
cancel_delayed_work_sync(&i915->gt.retire_work); drain_delayed_work(&i915->gt.retire_work);
cancel_delayed_work_sync(&i915->gt.idle_work); drain_delayed_work(&i915->gt.idle_work);
i915_gem_drain_workqueue(i915); i915_gem_drain_workqueue(i915);
mutex_lock(&i915->drm.struct_mutex); mutex_lock(&i915->drm.struct_mutex);
@ -68,13 +68,14 @@ static void mock_device_release(struct drm_device *dev)
i915_gem_contexts_fini(i915); i915_gem_contexts_fini(i915);
mutex_unlock(&i915->drm.struct_mutex); mutex_unlock(&i915->drm.struct_mutex);
i915_timelines_fini(i915);
drain_workqueue(i915->wq); drain_workqueue(i915->wq);
i915_gem_drain_freed_objects(i915); i915_gem_drain_freed_objects(i915);
mutex_lock(&i915->drm.struct_mutex); mutex_lock(&i915->drm.struct_mutex);
mock_fini_ggtt(&i915->ggtt); mock_fini_ggtt(&i915->ggtt);
mutex_unlock(&i915->drm.struct_mutex); mutex_unlock(&i915->drm.struct_mutex);
WARN_ON(!list_empty(&i915->gt.timelines));
destroy_workqueue(i915->wq); destroy_workqueue(i915->wq);
@ -226,7 +227,8 @@ struct drm_i915_private *mock_gem_device(void)
if (!i915->priorities) if (!i915->priorities)
goto err_dependencies; goto err_dependencies;
INIT_LIST_HEAD(&i915->gt.timelines); i915_timelines_init(i915);
INIT_LIST_HEAD(&i915->gt.active_rings); INIT_LIST_HEAD(&i915->gt.active_rings);
INIT_LIST_HEAD(&i915->gt.closed_vma); INIT_LIST_HEAD(&i915->gt.closed_vma);
@ -253,6 +255,7 @@ err_context:
i915_gem_contexts_fini(i915); i915_gem_contexts_fini(i915);
err_unlock: err_unlock:
mutex_unlock(&i915->drm.struct_mutex); mutex_unlock(&i915->drm.struct_mutex);
i915_timelines_fini(i915);
kmem_cache_destroy(i915->priorities); kmem_cache_destroy(i915->priorities);
err_dependencies: err_dependencies:
kmem_cache_destroy(i915->dependencies); kmem_cache_destroy(i915->dependencies);

View file

@ -10,6 +10,7 @@
void mock_timeline_init(struct i915_timeline *timeline, u64 context) void mock_timeline_init(struct i915_timeline *timeline, u64 context)
{ {
timeline->i915 = NULL;
timeline->fence_context = context; timeline->fence_context = context;
spin_lock_init(&timeline->lock); spin_lock_init(&timeline->lock);
@ -24,5 +25,5 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
void mock_timeline_fini(struct i915_timeline *timeline) void mock_timeline_fini(struct i915_timeline *timeline)
{ {
i915_timeline_fini(timeline); i915_syncmap_free(&timeline->sync);
} }

View file

@ -96,6 +96,5 @@ enum drm_color_lut_tests {
DRM_COLOR_LUT_NON_DECREASING = BIT(1), DRM_COLOR_LUT_NON_DECREASING = BIT(1),
}; };
int drm_color_lut_check(struct drm_property_blob *lut, int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests);
uint32_t tests);
#endif #endif

View file

@ -394,6 +394,9 @@
INTEL_VGA_DEVICE(0x3E9A, info) /* SRV GT2 */ INTEL_VGA_DEVICE(0x3E9A, info) /* SRV GT2 */
/* CFL H */ /* CFL H */
#define INTEL_CFL_H_GT1_IDS(info) \
INTEL_VGA_DEVICE(0x3E9C, info)
#define INTEL_CFL_H_GT2_IDS(info) \ #define INTEL_CFL_H_GT2_IDS(info) \
INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \ INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \
INTEL_VGA_DEVICE(0x3E94, info) /* Halo GT2 */ INTEL_VGA_DEVICE(0x3E94, info) /* Halo GT2 */
@ -426,6 +429,7 @@
#define INTEL_CFL_IDS(info) \ #define INTEL_CFL_IDS(info) \
INTEL_CFL_S_GT1_IDS(info), \ INTEL_CFL_S_GT1_IDS(info), \
INTEL_CFL_S_GT2_IDS(info), \ INTEL_CFL_S_GT2_IDS(info), \
INTEL_CFL_H_GT1_IDS(info), \
INTEL_CFL_H_GT2_IDS(info), \ INTEL_CFL_H_GT2_IDS(info), \
INTEL_CFL_U_GT2_IDS(info), \ INTEL_CFL_U_GT2_IDS(info), \
INTEL_CFL_U_GT3_IDS(info), \ INTEL_CFL_U_GT3_IDS(info), \