diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7ccfdbc8f9b4..ac75e0c5735c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1712,45 +1712,6 @@ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, wmb(); } -static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) -{ - GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); - return ppgtt->pd.base.ggtt_offset << 10; -} - -static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct i915_request *rq) -{ - struct intel_engine_cs *engine = rq->engine; - u32 *cs; - - /* NB: TLBs must be flushed and invalidated before a switch */ - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2); - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); - *cs++ = PP_DIR_DCLV_2G; - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); - *cs++ = get_pd_offset(ppgtt); - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct i915_request *rq) -{ - struct intel_engine_cs *engine = rq->engine; - struct drm_i915_private *dev_priv = rq->i915; - - I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); - return 0; -} - static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; @@ -2024,12 +1985,6 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) ppgtt->vm.dma = &i915->drm.pdev->dev; ppgtt->vm.pte_encode = ggtt->vm.pte_encode; - if (IS_GEN6(i915)) - ppgtt->switch_mm = gen6_mm_switch; - else if (IS_GEN7(i915)) - ppgtt->switch_mm = gen7_mm_switch; - else - BUG(); err = gen6_ppgtt_alloc(ppgtt); if (err) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 16307ba7e303..e70f6abcd0f2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -406,8 +406,6 @@ struct i915_hw_ppgtt { gen6_pte_t __iomem *pd_addr; - int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, - struct i915_request *rq); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); }; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f187250e60c6..9092f5464c24 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -817,6 +817,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) /* Keep a second pin for the dual retirement along engine and ring */ __intel_context_pin(ce); + rq->infix = rq->ring->emit; /* end of header; start of user payload */ + /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); return rq; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 491ff81d0fea..0e9aba53d0e4 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -134,6 +134,9 @@ struct i915_request { /** Position in the ring of the start of the request */ u32 head; + /** Position in the ring of the start of the user packets */ + u32 infix; + /** * Position in the ring of the start of the postfix. * This is required to calculate the maximum available ring space diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 1472f48ab2e8..b50c6b829715 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -973,39 +973,6 @@ DEFINE_EVENT(i915_context, i915_context_free, TP_ARGS(ctx) ); -/** - * DOC: switch_mm tracepoint - * - * This tracepoint allows tracking of the mm switch, which is an important point - * in the lifetime of the vm in the legacy submission path. This tracepoint is - * called only if full ppgtt is enabled. - */ -TRACE_EVENT(switch_mm, - TP_PROTO(struct intel_engine_cs *engine, struct i915_gem_context *to), - - TP_ARGS(engine, to), - - TP_STRUCT__entry( - __field(u16, class) - __field(u16, instance) - __field(struct i915_gem_context *, to) - __field(struct i915_address_space *, vm) - __field(u32, dev) - ), - - TP_fast_assign( - __entry->class = engine->uabi_class; - __entry->instance = engine->instance; - __entry->to = to; - __entry->vm = to->ppgtt ? &to->ppgtt->vm : NULL; - __entry->dev = engine->i915->drm.primary->index; - ), - - TP_printk("dev=%u, engine=%u:%u, ctx=%p, ctx_vm=%p", - __entry->dev, __entry->class, __entry->instance, __entry->to, - __entry->vm) -); - #endif /* _I915_TRACE_H_ */ /* This part must be outside protection */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 2ec2e60dc670..d1cf8b4926ab 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1168,9 +1168,6 @@ void intel_engine_lost_context(struct intel_engine_cs *engine) lockdep_assert_held(&engine->i915->drm.struct_mutex); - engine->legacy_active_context = NULL; - engine->legacy_active_ppgtt = NULL; - ce = fetch_and_zero(&engine->last_retired_context); if (ce) intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5bc53a5f4504..d72a6a5ff3ac 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -541,11 +541,23 @@ static struct i915_request *reset_prepare(struct intel_engine_cs *engine) return i915_gem_find_active_request(engine); } -static void reset_ring(struct intel_engine_cs *engine, - struct i915_request *request) +static void skip_request(struct i915_request *rq) { - GEM_TRACE("%s seqno=%x\n", - engine->name, request ? request->global_seqno : 0); + void *vaddr = rq->ring->vaddr; + u32 head; + + head = rq->infix; + if (rq->postfix < head) { + memset32(vaddr + head, MI_NOOP, + (rq->ring->size - head) / sizeof(u32)); + head = 0; + } + memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32)); +} + +static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq) +{ + GEM_TRACE("%s seqno=%x\n", engine->name, rq ? rq->global_seqno : 0); /* * RC6 must be prevented until the reset is complete and the engine @@ -569,43 +581,11 @@ static void reset_ring(struct intel_engine_cs *engine, * If the request was innocent, we try to replay the request with * the restored context. */ - if (request) { - struct drm_i915_private *dev_priv = request->i915; - struct intel_context *ce = request->hw_context; - struct i915_hw_ppgtt *ppgtt; - - if (ce->state) { - I915_WRITE(CCID, - i915_ggtt_offset(ce->state) | - BIT(8) /* must be set! */ | - CCID_EXTENDED_STATE_SAVE | - CCID_EXTENDED_STATE_RESTORE | - CCID_EN); - } - - ppgtt = request->gem_context->ppgtt ?: engine->i915->mm.aliasing_ppgtt; - if (ppgtt) { - u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10; - - I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset); - - /* Wait for the PD reload to complete */ - if (intel_wait_for_register(dev_priv, - RING_PP_DIR_BASE(engine), - BIT(0), 0, - 10)) - DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n"); - - ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); - } - + if (rq) { /* If the rq hung, jump to its breadcrumb and skip the batch */ - if (request->fence.error == -EIO) - request->ring->head = request->postfix; - } else { - engine->legacy_active_context = NULL; - engine->legacy_active_ppgtt = NULL; + rq->ring->head = intel_ring_wrap(rq->ring, rq->head); + if (rq->fence.error == -EIO) + skip_request(rq); } } @@ -1446,6 +1426,29 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) intel_ring_reset(engine->buffer, 0); } +static int load_pd_dir(struct i915_request *rq, + const struct i915_hw_ppgtt *ppgtt) +{ + const struct intel_engine_cs * const engine = rq->engine; + u32 *cs; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = ppgtt->pd.base.ggtt_offset << 10; + + intel_ring_advance(rq, cs); + + return 0; +} + static inline int mi_set_context(struct i915_request *rq, u32 flags) { struct drm_i915_private *i915 = rq->i915; @@ -1590,31 +1593,28 @@ static int remap_l3(struct i915_request *rq, int slice) static int switch_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *to_ctx = rq->gem_context; - struct i915_hw_ppgtt *to_mm = - to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; - struct i915_gem_context *from_ctx = engine->legacy_active_context; - struct i915_hw_ppgtt *from_mm = engine->legacy_active_ppgtt; + struct i915_gem_context *ctx = rq->gem_context; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; + unsigned int unwind_mm = 0; u32 hw_flags = 0; int ret, i; lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); - if (to_mm != from_mm || - (to_mm && intel_engine_flag(engine) & to_mm->pd_dirty_rings)) { - trace_switch_mm(engine, to_ctx); - ret = to_mm->switch_mm(to_mm, rq); + if (ppgtt) { + ret = load_pd_dir(rq, ppgtt); if (ret) goto err; - to_mm->pd_dirty_rings &= ~intel_engine_flag(engine); - engine->legacy_active_ppgtt = to_mm; - hw_flags = MI_FORCE_RESTORE; + if (intel_engine_flag(engine) & ppgtt->pd_dirty_rings) { + unwind_mm = intel_engine_flag(engine); + ppgtt->pd_dirty_rings &= ~unwind_mm; + hw_flags = MI_FORCE_RESTORE; + } } - if (rq->hw_context->state && - (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { + if (rq->hw_context->state) { GEM_BUG_ON(engine->id != RCS); /* @@ -1624,35 +1624,32 @@ static int switch_context(struct i915_request *rq) * as nothing actually executes using the kernel context; it * is purely used for flushing user contexts. */ - if (i915_gem_context_is_kernel(to_ctx)) + if (i915_gem_context_is_kernel(ctx)) hw_flags = MI_RESTORE_INHIBIT; ret = mi_set_context(rq, hw_flags); if (ret) goto err_mm; - - engine->legacy_active_context = to_ctx; } - if (to_ctx->remap_slice) { + if (ctx->remap_slice) { for (i = 0; i < MAX_L3_SLICES; i++) { - if (!(to_ctx->remap_slice & BIT(i))) + if (!(ctx->remap_slice & BIT(i))) continue; ret = remap_l3(rq, i); if (ret) - goto err_ctx; + goto err_mm; } - to_ctx->remap_slice = 0; + ctx->remap_slice = 0; } return 0; -err_ctx: - engine->legacy_active_context = from_ctx; err_mm: - engine->legacy_active_ppgtt = from_mm; + if (unwind_mm) + ppgtt->pd_dirty_rings |= unwind_mm; err: return ret; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index acef385c4c80..b44c67849749 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -557,15 +557,6 @@ struct intel_engine_cs { */ struct intel_context *last_retired_context; - /* We track the current MI_SET_CONTEXT in order to eliminate - * redudant context switches. This presumes that requests are not - * reordered! Or when they are the tracking is updated along with - * the emission of individual requests into the legacy command - * stream (ring). - */ - struct i915_gem_context *legacy_active_context; - struct i915_hw_ppgtt *legacy_active_ppgtt; - /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier;