@@ -7,6 +7,7 @@
#include <linux/iommu.h>
#include <linux/list.h>
+#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
@@ -76,8 +77,65 @@ static int iopf_complete(struct device *dev, struct iopf_fault *iopf,
static enum iommu_page_response_code
iopf_handle_single(struct iopf_fault *iopf)
{
- /* TODO */
- return -ENODEV;
+ vm_fault_t ret;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned int access_flags = 0;
+ unsigned int fault_flags = FAULT_FLAG_REMOTE;
+ struct iommu_fault_page_request *prm = &iopf->fault.prm;
+ enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
+
+ if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
+ return status;
+
+ mm = iommu_sva_find(prm->pasid);
+ if (IS_ERR_OR_NULL(mm))
+ return status;
+
+ down_read(&mm->mmap_sem);
+
+ vma = find_extend_vma(mm, prm->addr);
+ if (!vma)
+ /* Unmapped area */
+ goto out_put_mm;
+
+ if (prm->perm & IOMMU_FAULT_PERM_READ)
+ access_flags |= VM_READ;
+
+ if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
+ access_flags |= VM_WRITE;
+ fault_flags |= FAULT_FLAG_WRITE;
+ }
+
+ if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
+ access_flags |= VM_EXEC;
+ fault_flags |= FAULT_FLAG_INSTRUCTION;
+ }
+
+ if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
+ fault_flags |= FAULT_FLAG_USER;
+
+ if (access_flags & ~vma->vm_flags)
+ /* Access fault */
+ goto out_put_mm;
+
+ ret = handle_mm_fault(vma, prm->addr, fault_flags);
+ status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
+ IOMMU_PAGE_RESP_SUCCESS;
+
+out_put_mm:
+ up_read(&mm->mmap_sem);
+
+ /*
+ * If the process exits while we're handling the fault on its mm, we
+ * can't do mmput(). exit_mmap() would release the MMU notifier, calling
+ * iommu_notifier_release(), which has to flush the fault queue that
+ * we're executing on... So mmput_async() moves the release of the mm to
+ * another thread, if we're the last user.
+ */
+ mmput_async(mm);
+
+ return status;
}
static void iopf_handle_group(struct work_struct *work)
@@ -111,6 +169,30 @@ static void iopf_handle_group(struct work_struct *work)
*
* Add a fault to the device workqueue, to be handled by mm.
*
+ * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard
+ * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't
+ * expect a response. It may be generated when disabling a PASID (issuing a
+ * PASID stop request) by some PCI devices.
+ *
+ * The PASID stop request is triggered by the mm_exit() callback. When the
+ * callback returns from the device driver, no page request is generated for
+ * this PASID anymore and outstanding ones have been pushed to the IOMMU (as per
+ * PCIe 4.0r1.0 - 6.20.1 and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some
+ * PCI devices will wait for all outstanding page requests to come back with a
+ * response before completing the PASID stop request. Others do not wait for
+ * page responses, and instead issue this Stop Marker that tells us when the
+ * PASID can be reallocated.
+ *
+ * It is safe to discard the Stop Marker because it is an optimization.
+ * a. Page requests, which are posted requests, have been flushed to the IOMMU
+ * when mm_exit() returns,
+ * b. We flush all fault queues after mm_exit() returns and before freeing the
+ * PASID.
+ *
+ * So even though the Stop Marker might be issued by the device *after* the stop
+ * request completes, outstanding faults will have been dealt with by the time
+ * we free the PASID.
+ *
* Return: 0 on success and <0 on error.
*/
int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)