Message ID | 20220126165845.22813-2-cascardo@canonical.com |
---|---|
State | New |
Headers | show |
Series | CVE-2022-0330 | expand |
On 26.01.22 17:58, Thadeu Lima de Souza Cascardo wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > We need to flush TLBs before releasing backing store otherwise userspace > is able to encounter stale entries if a) it is not declaring GPU access to > certain buffers and b) this GPU execution then races with the backing > store release getting triggered asynchronously. > > Approach taken is to mark any buffer objects which were ever bound to the > GPU and triggering a serialized TLB flush when their backing store is > released. > > Alternatively the flushing could be done on VMA unbind, at which point we > would be able to ascertain whether there is potential parallel GPU > execution (which could race), but choice essentially boils down to paying > the cost of TLB flushes maybe needlessly at VMA unbind time (when the > backing store is not known to be definitely going away, so flushing not > always required for safety), versus potentially needlessly at backing > store relase time since at that point cannot tell whether there is a > parallel GPU execution happening. > > Therefore simplicity of implementation has been chosen for now, with scope > to benchmark and refine later as required. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Reported-by: Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > Cc: Jon Bloomfield <jon.bloomfield@intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Jani Nikula <jani.nikula@intel.com> > Cc: stable@vger.kernel.org > (backported from commit 7938d61591d33394a21bdd7797a245b65428f44c) > [cascardo: confict cleanup because of extra i915_gemfs_init] > CVE-2022-0330 > Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> > --- Applied to bionic:linux/master-next. Thanks. -Stefan > drivers/gpu/drm/i915/i915_drv.h | 2 + > drivers/gpu/drm/i915/i915_gem.c | 84 +++++++++++++++++++++++++- > drivers/gpu/drm/i915/i915_gem_object.h | 1 + > drivers/gpu/drm/i915/i915_reg.h | 6 ++ > drivers/gpu/drm/i915/i915_vma.c | 4 ++ > 5 files changed, 96 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index a7b32ee8549e..0efcb09c3306 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2245,6 +2245,8 @@ struct drm_i915_private { > > struct intel_uncore uncore; > > + struct mutex tlb_invalidate_lock; > + > struct i915_virtual_gpu vgpu; > > struct intel_gvt *gvt; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 8356adc9a67a..32974cf96f73 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2284,6 +2284,76 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) > rcu_read_unlock(); > } > > +struct reg_and_bit { > + i915_reg_t reg; > + u32 bit; > +}; > + > +static struct reg_and_bit > +get_reg_and_bit(const struct intel_engine_cs *engine, > + const i915_reg_t *regs, const unsigned int num) > +{ > + const unsigned int class = engine->class; > + struct reg_and_bit rb = { .bit = 1 }; > + > + if (WARN_ON_ONCE(class >= num || !regs[class].reg)) > + return rb; > + > + rb.reg = regs[class]; > + if (class == VIDEO_DECODE_CLASS) > + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ > + > + return rb; > +} > + > +static void invalidate_tlbs(struct drm_i915_private *dev_priv) > +{ > + static const i915_reg_t gen8_regs[] = { > + [RENDER_CLASS] = GEN8_RTCR, > + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ > + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, > + [COPY_ENGINE_CLASS] = GEN8_BTCR, > + }; > + const unsigned int num = ARRAY_SIZE(gen8_regs); > + const i915_reg_t *regs = gen8_regs; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + > + if (INTEL_GEN(dev_priv) < 8) > + return; > + > + assert_rpm_wakelock_held(dev_priv); > + > + mutex_lock(&dev_priv->tlb_invalidate_lock); > + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); > + > + for_each_engine(engine, dev_priv, id) { > + /* > + * HW architecture suggest typical invalidation time at 40us, > + * with pessimistic cases up to 100us and a recommendation to > + * cap at 1ms. We go a bit higher just in case. > + */ > + const unsigned int timeout_us = 100; > + const unsigned int timeout_ms = 4; > + struct reg_and_bit rb; > + > + rb = get_reg_and_bit(engine, regs, num); > + if (!i915_mmio_reg_offset(rb.reg)) > + continue; > + > + I915_WRITE_FW(rb.reg, rb.bit); > + if (__intel_wait_for_register_fw(dev_priv, > + rb.reg, rb.bit, 0, > + timeout_us, timeout_ms, > + NULL)) > + DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n", > + engine->name, timeout_ms); > + } > + > + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); > + mutex_unlock(&dev_priv->tlb_invalidate_lock); > +} > + > void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, > enum i915_mm_subclass subclass) > { > @@ -2326,8 +2396,18 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, > > __i915_gem_object_reset_page_iter(obj); > > - if (!IS_ERR(pages)) > + if (!IS_ERR(pages)) { > + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { > + struct drm_i915_private *i915 = to_i915(obj->base.dev); > + > + if (intel_runtime_pm_get_if_in_use(i915)) { > + invalidate_tlbs(i915); > + intel_runtime_pm_put(i915); > + } > + } > + > obj->ops->put_pages(obj, pages); > + } > > obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; > > @@ -5288,6 +5368,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) > > spin_lock_init(&dev_priv->fb_tracking.lock); > > + mutex_init(&dev_priv->tlb_invalidate_lock); > + > err = i915_gemfs_init(dev_priv); > if (err) > DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); > diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h > index 6b1de7942349..e3c0a897ca19 100644 > --- a/drivers/gpu/drm/i915/i915_gem_object.h > +++ b/drivers/gpu/drm/i915/i915_gem_object.h > @@ -135,6 +135,7 @@ struct drm_i915_gem_object { > * activity? > */ > #define I915_BO_ACTIVE_REF 0 > +#define I915_BO_WAS_BOUND_BIT 1 > > /* > * Is the object to be mapped as read-only to the GPU > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 814b4c66c6e3..ae1f77e3a824 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -2384,6 +2384,12 @@ enum i915_power_well_id { > #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1<<28) > #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1<<24) > > +#define GEN8_RTCR _MMIO(0x4260) > +#define GEN8_M1TCR _MMIO(0x4264) > +#define GEN8_M2TCR _MMIO(0x4268) > +#define GEN8_BTCR _MMIO(0x426c) > +#define GEN8_VTCR _MMIO(0x4270) > + > #if 0 > #define PRB0_TAIL _MMIO(0x2030) > #define PRB0_HEAD _MMIO(0x2034) > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index 7c4ab4263d33..19475637ba2d 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -283,6 +283,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, > return ret; > > vma->flags |= bind_flags; > + > + if (vma->obj) > + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); > + > return 0; > } >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a7b32ee8549e..0efcb09c3306 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2245,6 +2245,8 @@ struct drm_i915_private { struct intel_uncore uncore; + struct mutex tlb_invalidate_lock; + struct i915_virtual_gpu vgpu; struct intel_gvt *gvt; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8356adc9a67a..32974cf96f73 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2284,6 +2284,76 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) rcu_read_unlock(); } +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { .bit = 1 }; + + if (WARN_ON_ONCE(class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + + return rb; +} + +static void invalidate_tlbs(struct drm_i915_private *dev_priv) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + const unsigned int num = ARRAY_SIZE(gen8_regs); + const i915_reg_t *regs = gen8_regs; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (INTEL_GEN(dev_priv) < 8) + return; + + assert_rpm_wakelock_held(dev_priv); + + mutex_lock(&dev_priv->tlb_invalidate_lock); + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + for_each_engine(engine, dev_priv, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + I915_WRITE_FW(rb.reg, rb.bit); + if (__intel_wait_for_register_fw(dev_priv, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + mutex_unlock(&dev_priv->tlb_invalidate_lock); +} + void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, enum i915_mm_subclass subclass) { @@ -2326,8 +2396,18 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, __i915_gem_object_reset_page_iter(obj); - if (!IS_ERR(pages)) + if (!IS_ERR(pages)) { + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + if (intel_runtime_pm_get_if_in_use(i915)) { + invalidate_tlbs(i915); + intel_runtime_pm_put(i915); + } + } + obj->ops->put_pages(obj, pages); + } obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; @@ -5288,6 +5368,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) spin_lock_init(&dev_priv->fb_tracking.lock); + mutex_init(&dev_priv->tlb_invalidate_lock); + err = i915_gemfs_init(dev_priv); if (err) DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 6b1de7942349..e3c0a897ca19 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -135,6 +135,7 @@ struct drm_i915_gem_object { * activity? */ #define I915_BO_ACTIVE_REF 0 +#define I915_BO_WAS_BOUND_BIT 1 /* * Is the object to be mapped as read-only to the GPU diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 814b4c66c6e3..ae1f77e3a824 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2384,6 +2384,12 @@ enum i915_power_well_id { #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1<<28) #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1<<24) +#define GEN8_RTCR _MMIO(0x4260) +#define GEN8_M1TCR _MMIO(0x4264) +#define GEN8_M2TCR _MMIO(0x4268) +#define GEN8_BTCR _MMIO(0x426c) +#define GEN8_VTCR _MMIO(0x4270) + #if 0 #define PRB0_TAIL _MMIO(0x2030) #define PRB0_HEAD _MMIO(0x2034) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 7c4ab4263d33..19475637ba2d 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -283,6 +283,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, return ret; vma->flags |= bind_flags; + + if (vma->obj) + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + return 0; }