Message ID | 20220126165845.22813-6-cascardo@canonical.com |
---|---|
State | New |
Headers | show |
Series | [SRU,Impish] drm/i915: Flush TLBs before releasing backing store | expand |
On 26.01.22 17:58, Thadeu Lima de Souza Cascardo wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > We need to flush TLBs before releasing backing store otherwise userspace > is able to encounter stale entries if a) it is not declaring GPU access to > certain buffers and b) this GPU execution then races with the backing > store release getting triggered asynchronously. > > Approach taken is to mark any buffer objects which were ever bound to the > GPU and triggering a serialized TLB flush when their backing store is > released. > > Alternatively the flushing could be done on VMA unbind, at which point we > would be able to ascertain whether there is potential parallel GPU > execution (which could race), but choice essentially boils down to paying > the cost of TLB flushes maybe needlessly at VMA unbind time (when the > backing store is not known to be definitely going away, so flushing not > always required for safety), versus potentially needlessly at backing > store relase time since at that point cannot tell whether there is a > parallel GPU execution happening. > > Therefore simplicity of implementation has been chosen for now, with scope > to benchmark and refine later as required. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Reported-by: Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > Cc: Jon Bloomfield <jon.bloomfield@intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Jani Nikula <jani.nikula@intel.com> > Cc: stable@vger.kernel.org > (backported from commit 7938d61591d33394a21bdd7797a245b65428f44c) > [cascardo: use 6 for I915_BO_WAS_BOUND_BIT] > [cascardo: use INTEL_GEN instead of GRAPHICS_VER] > CVE-2022-0330 > Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> > --- Applied to impish:linux/master-next. Thanks. -Stefan > .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + > drivers/gpu/drm/i915/gem/i915_gem_pages.c | 10 ++ > drivers/gpu/drm/i915/gt/intel_gt.c | 102 ++++++++++++++++++ > drivers/gpu/drm/i915/gt/intel_gt.h | 2 + > drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 + > drivers/gpu/drm/i915/i915_reg.h | 11 ++ > drivers/gpu/drm/i915/i915_vma.c | 3 + > drivers/gpu/drm/i915/intel_uncore.c | 26 ++++- > drivers/gpu/drm/i915/intel_uncore.h | 2 + > 9 files changed, 155 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > index 8e485cb3343c..ca4305b1b98f 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > @@ -177,6 +177,7 @@ struct drm_i915_gem_object { > I915_BO_ALLOC_STRUCT_PAGE) > #define I915_BO_READONLY BIT(3) > #define I915_TILING_QUIRK_BIT 4 /* unknown swizzling; do not release! */ > +#define I915_BO_WAS_BOUND_BIT 6 > > /* > * Is the object to be mapped as read-only to the GPU > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c > index 7361971c177d..f5a7a53f379b 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c > @@ -10,6 +10,8 @@ > #include "i915_gem_lmem.h" > #include "i915_gem_mman.h" > > +#include "gt/intel_gt.h" > + > void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, > struct sg_table *pages, > unsigned int sg_page_sizes) > @@ -218,6 +220,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) > __i915_gem_object_reset_page_iter(obj); > obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; > > + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { > + struct drm_i915_private *i915 = to_i915(obj->base.dev); > + intel_wakeref_t wakeref; > + > + with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) > + intel_gt_invalidate_tlbs(&i915->gt); > + } > + > return pages; > } > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c > index 8d77dcbad059..0354927bba74 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > @@ -28,6 +28,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) > > spin_lock_init(>->irq_lock); > > + mutex_init(>->tlb_invalidate_lock); > + > INIT_LIST_HEAD(>->closed_vma); > spin_lock_init(>->closed_lock); > > @@ -705,3 +707,103 @@ void intel_gt_info_print(const struct intel_gt_info *info, > > intel_sseu_dump(&info->sseu, p); > } > + > +struct reg_and_bit { > + i915_reg_t reg; > + u32 bit; > +}; > + > +static struct reg_and_bit > +get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, > + const i915_reg_t *regs, const unsigned int num) > +{ > + const unsigned int class = engine->class; > + struct reg_and_bit rb = { }; > + > + if (drm_WARN_ON_ONCE(&engine->i915->drm, > + class >= num || !regs[class].reg)) > + return rb; > + > + rb.reg = regs[class]; > + if (gen8 && class == VIDEO_DECODE_CLASS) > + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ > + else > + rb.bit = engine->instance; > + > + rb.bit = BIT(rb.bit); > + > + return rb; > +} > + > +void intel_gt_invalidate_tlbs(struct intel_gt *gt) > +{ > + static const i915_reg_t gen8_regs[] = { > + [RENDER_CLASS] = GEN8_RTCR, > + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ > + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, > + [COPY_ENGINE_CLASS] = GEN8_BTCR, > + }; > + static const i915_reg_t gen12_regs[] = { > + [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, > + [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, > + [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, > + [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, > + }; > + struct drm_i915_private *i915 = gt->i915; > + struct intel_uncore *uncore = gt->uncore; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + const i915_reg_t *regs; > + unsigned int num = 0; > + > + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) > + return; > + > + if (INTEL_GEN(i915) == 12) { > + regs = gen12_regs; > + num = ARRAY_SIZE(gen12_regs); > + } else if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) <= 11) { > + regs = gen8_regs; > + num = ARRAY_SIZE(gen8_regs); > + } else if (INTEL_GEN(i915) < 8) { > + return; > + } > + > + if (drm_WARN_ONCE(&i915->drm, !num, > + "Platform does not implement TLB invalidation!")) > + return; > + > + GEM_TRACE("\n"); > + > + assert_rpm_wakelock_held(&i915->runtime_pm); > + > + mutex_lock(>->tlb_invalidate_lock); > + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); > + > + for_each_engine(engine, gt, id) { > + /* > + * HW architecture suggest typical invalidation time at 40us, > + * with pessimistic cases up to 100us and a recommendation to > + * cap at 1ms. We go a bit higher just in case. > + */ > + const unsigned int timeout_us = 100; > + const unsigned int timeout_ms = 4; > + struct reg_and_bit rb; > + > + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); > + if (!i915_mmio_reg_offset(rb.reg)) > + continue; > + > + intel_uncore_write_fw(uncore, rb.reg, rb.bit); > + if (__intel_wait_for_register_fw(uncore, > + rb.reg, rb.bit, 0, > + timeout_us, timeout_ms, > + NULL)) > + drm_err_ratelimited(>->i915->drm, > + "%s TLB invalidation did not complete in %ums!\n", > + engine->name, timeout_ms); > + } > + > + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); > + mutex_unlock(>->tlb_invalidate_lock); > +} > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h > index 7ec395cace69..5d090b338e77 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt.h > @@ -80,4 +80,6 @@ void intel_gt_info_print(const struct intel_gt_info *info, > > void intel_gt_watchdog_work(struct work_struct *work); > > +void intel_gt_invalidate_tlbs(struct intel_gt *gt); > + > #endif /* __INTEL_GT_H__ */ > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h > index 0caf6ca0a784..e461b38042a8 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h > @@ -38,6 +38,8 @@ struct intel_gt { > > struct intel_uc uc; > > + struct mutex tlb_invalidate_lock; > + > struct intel_gt_timelines { > spinlock_t lock; /* protects active_list */ > struct list_head active_list; > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index dc801fe149c4..f4e24f65bfb5 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -2646,6 +2646,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) > #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28) > #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24) > > +#define GEN8_RTCR _MMIO(0x4260) > +#define GEN8_M1TCR _MMIO(0x4264) > +#define GEN8_M2TCR _MMIO(0x4268) > +#define GEN8_BTCR _MMIO(0x426c) > +#define GEN8_VTCR _MMIO(0x4270) > + > #if 0 > #define PRB0_TAIL _MMIO(0x2030) > #define PRB0_HEAD _MMIO(0x2034) > @@ -2735,6 +2741,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) > #define FAULT_VA_HIGH_BITS (0xf << 0) > #define FAULT_GTT_SEL (1 << 4) > > +#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) > +#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) > +#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) > +#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) > + > #define GEN12_AUX_ERR_DBG _MMIO(0x43f4) > > #define FPGA_DBG _MMIO(0x42300) > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index 07490db51cdc..60cc0c37b1c4 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -439,6 +439,9 @@ int i915_vma_bind(struct i915_vma *vma, > vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); > } > > + if (vma->obj) > + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); > + > atomic_or(bind_flags, &vma->flags); > return 0; > } > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c > index 661b50191f2b..5e198b36a586 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -710,7 +710,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore, > } > > static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, > - enum forcewake_domains fw_domains) > + enum forcewake_domains fw_domains, > + bool delayed) > { > struct intel_uncore_forcewake_domain *domain; > unsigned int tmp; > @@ -725,7 +726,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, > continue; > } > > - uncore->funcs.force_wake_put(uncore, domain->mask); > + if (delayed && > + !(domain->uncore->fw_domains_timer & domain->mask)) > + fw_domain_arm_timer(domain); > + else > + uncore->funcs.force_wake_put(uncore, domain->mask); > } > } > > @@ -746,7 +751,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore, > return; > > spin_lock_irqsave(&uncore->lock, irqflags); > - __intel_uncore_forcewake_put(uncore, fw_domains); > + __intel_uncore_forcewake_put(uncore, fw_domains, false); > + spin_unlock_irqrestore(&uncore->lock, irqflags); > +} > + > +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, > + enum forcewake_domains fw_domains) > +{ > + unsigned long irqflags; > + > + if (!uncore->funcs.force_wake_put) > + return; > + > + spin_lock_irqsave(&uncore->lock, irqflags); > + __intel_uncore_forcewake_put(uncore, fw_domains, true); > spin_unlock_irqrestore(&uncore->lock, irqflags); > } > > @@ -788,7 +806,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore, > if (!uncore->funcs.force_wake_put) > return; > > - __intel_uncore_forcewake_put(uncore, fw_domains); > + __intel_uncore_forcewake_put(uncore, fw_domains, false); > } > > void assert_forcewakes_inactive(struct intel_uncore *uncore) > diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h > index 59f0da8f1fbb..1a8e1017f936 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.h > +++ b/drivers/gpu/drm/i915/intel_uncore.h > @@ -211,6 +211,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore, > enum forcewake_domains domains); > void intel_uncore_forcewake_put(struct intel_uncore *uncore, > enum forcewake_domains domains); > +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, > + enum forcewake_domains domains); > void intel_uncore_forcewake_flush(struct intel_uncore *uncore, > enum forcewake_domains fw_domains); >
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 8e485cb3343c..ca4305b1b98f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -177,6 +177,7 @@ struct drm_i915_gem_object { I915_BO_ALLOC_STRUCT_PAGE) #define I915_BO_READONLY BIT(3) #define I915_TILING_QUIRK_BIT 4 /* unknown swizzling; do not release! */ +#define I915_BO_WAS_BOUND_BIT 6 /* * Is the object to be mapped as read-only to the GPU diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 7361971c177d..f5a7a53f379b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -10,6 +10,8 @@ #include "i915_gem_lmem.h" #include "i915_gem_mman.h" +#include "gt/intel_gt.h" + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, unsigned int sg_page_sizes) @@ -218,6 +220,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) __i915_gem_object_reset_page_iter(obj); obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; + + with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) + intel_gt_invalidate_tlbs(&i915->gt); + } + return pages; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 8d77dcbad059..0354927bba74 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -28,6 +28,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) spin_lock_init(>->irq_lock); + mutex_init(>->tlb_invalidate_lock); + INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -705,3 +707,103 @@ void intel_gt_info_print(const struct intel_gt_info *info, intel_sseu_dump(&info->sseu, p); } + +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { }; + + if (drm_WARN_ON_ONCE(&engine->i915->drm, + class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (gen8 && class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + else + rb.bit = engine->instance; + + rb.bit = BIT(rb.bit); + + return rb; +} + +void intel_gt_invalidate_tlbs(struct intel_gt *gt) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + static const i915_reg_t gen12_regs[] = { + [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + }; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const i915_reg_t *regs; + unsigned int num = 0; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (INTEL_GEN(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); + } else if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) <= 11) { + regs = gen8_regs; + num = ARRAY_SIZE(gen8_regs); + } else if (INTEL_GEN(i915) < 8) { + return; + } + + if (drm_WARN_ONCE(&i915->drm, !num, + "Platform does not implement TLB invalidation!")) + return; + + GEM_TRACE("\n"); + + assert_rpm_wakelock_held(&i915->runtime_pm); + + mutex_lock(>->tlb_invalidate_lock); + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + for_each_engine(engine, gt, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + if (__intel_wait_for_register_fw(uncore, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + drm_err_ratelimited(>->i915->drm, + "%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); + mutex_unlock(>->tlb_invalidate_lock); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 7ec395cace69..5d090b338e77 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -80,4 +80,6 @@ void intel_gt_info_print(const struct intel_gt_info *info, void intel_gt_watchdog_work(struct work_struct *work); +void intel_gt_invalidate_tlbs(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 0caf6ca0a784..e461b38042a8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -38,6 +38,8 @@ struct intel_gt { struct intel_uc uc; + struct mutex tlb_invalidate_lock; + struct intel_gt_timelines { spinlock_t lock; /* protects active_list */ struct list_head active_list; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index dc801fe149c4..f4e24f65bfb5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2646,6 +2646,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28) #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24) +#define GEN8_RTCR _MMIO(0x4260) +#define GEN8_M1TCR _MMIO(0x4264) +#define GEN8_M2TCR _MMIO(0x4268) +#define GEN8_BTCR _MMIO(0x426c) +#define GEN8_VTCR _MMIO(0x4270) + #if 0 #define PRB0_TAIL _MMIO(0x2030) #define PRB0_HEAD _MMIO(0x2034) @@ -2735,6 +2741,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) +#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) +#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) +#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) +#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) + #define GEN12_AUX_ERR_DBG _MMIO(0x43f4) #define FPGA_DBG _MMIO(0x42300) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 07490db51cdc..60cc0c37b1c4 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -439,6 +439,9 @@ int i915_vma_bind(struct i915_vma *vma, vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } + if (vma->obj) + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + atomic_or(bind_flags, &vma->flags); return 0; } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 661b50191f2b..5e198b36a586 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -710,7 +710,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore, } static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, - enum forcewake_domains fw_domains) + enum forcewake_domains fw_domains, + bool delayed) { struct intel_uncore_forcewake_domain *domain; unsigned int tmp; @@ -725,7 +726,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, continue; } - uncore->funcs.force_wake_put(uncore, domain->mask); + if (delayed && + !(domain->uncore->fw_domains_timer & domain->mask)) + fw_domain_arm_timer(domain); + else + uncore->funcs.force_wake_put(uncore, domain->mask); } } @@ -746,7 +751,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore, return; spin_lock_irqsave(&uncore->lock, irqflags); - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); + spin_unlock_irqrestore(&uncore->lock, irqflags); +} + +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains fw_domains) +{ + unsigned long irqflags; + + if (!uncore->funcs.force_wake_put) + return; + + spin_lock_irqsave(&uncore->lock, irqflags); + __intel_uncore_forcewake_put(uncore, fw_domains, true); spin_unlock_irqrestore(&uncore->lock, irqflags); } @@ -788,7 +806,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore, if (!uncore->funcs.force_wake_put) return; - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); } void assert_forcewakes_inactive(struct intel_uncore *uncore) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 59f0da8f1fbb..1a8e1017f936 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -211,6 +211,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore, enum forcewake_domains domains); void intel_uncore_forcewake_put(struct intel_uncore *uncore, enum forcewake_domains domains); +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains domains); void intel_uncore_forcewake_flush(struct intel_uncore *uncore, enum forcewake_domains fw_domains);