Message ID | 1428921530-28556-1-git-send-email-acourbot@nvidia.com |
---|---|
State | Deferred |
Headers | show |
Ben, I guess our main remaining concern with this patch is how it should integrate wrt. the existing PMU code. Since it is designed to interact with the NVIDIA firmware, maybe we should use a different base code, or do you think we can somehow share code and data structures? Deepak, as explained in the review, I think your time is better spent making progress on (and starting submission of) the secure boot code. Hopefully this patch starts to be in good shape, so I will take care of it from now on. Thanks! On 04/13/2015 07:38 PM, Alexandre Courbot wrote: > From: Deepak Goyal <dgoyal@nvidia.com> > > - Maps PMU firmware into PMU virtual memory. > - Copy bootloader into PMU memory and start it. > - Allow the PMU to interact with HOST via interrupts. > > PMU after successful configurations (to follow after this patch) will: > 1.Autonomously power gate graphics engine when not in use.It will save > us a lot of power. > 2.Provide better way to scale frequencies by reporting Perf counters. > 3.Be critical for GPU functionality as future GPUs secure some register > & mem accesses involved in context switch. > > Signed-off-by: Deepak Goyal <dgoyal@nvidia.com> > Signed-off-by: Alexandre Courbot <acourbot@nvidia.com> > --- > Changes since v3: > - Cleaned formatting, renamed variables according to Nouveau standards > - (Hopefully) fixed init/fini sequence > - Removed a few more unneeded variables > > drm/nouveau/nvkm/subdev/pmu/gk20a.c | 803 +++++++++++++++++++++++++++++++++++- > 1 file changed, 785 insertions(+), 18 deletions(-) > > diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drm/nouveau/nvkm/subdev/pmu/gk20a.c > index 594f746e68f2..c206ec5e558a 100644 > --- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c > +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. > + * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. > * > * Permission is hereby granted, free of charge, to any person obtaining a > * copy of this software and associated documentation files (the "Software"), > @@ -19,14 +19,186 @@ > * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > * DEALINGS IN THE SOFTWARE. > */ > -#include "priv.h" > > +#include "priv.h" > +#include <core/client.h> > +#include <core/gpuobj.h> > +#include <subdev/bar.h> > +#include <subdev/fb.h> > +#include <subdev/mc.h> > +#include <subdev/timer.h> > +#include <subdev/mmu.h> > +#include <subdev/pmu.h> > +#include <core/object.h> > +#include <core/device.h> > +#include <linux/delay.h> > +#include <linux/firmware.h> > #include <subdev/clk.h> > #include <subdev/timer.h> > #include <subdev/volt.h> > > +#define APP_VERSION_GK20A 17997577 > +#define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) > +#define PMU_QUEUE_COUNT 5 > + > +#define GK20A_PMU_TRACE_BUFSIZE 0x4000 /* 4K */ > +#define GK20A_PMU_DMEM_BLKSIZE2 8 > +#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32 > +#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64 > + > +#define PMU_UNIT_REWIND (0x00) > +#define PMU_UNIT_PG (0x03) > +#define PMU_UNIT_INIT (0x07) > +#define PMU_UNIT_PERFMON (0x12) > +#define PMU_UNIT_THERM (0x1B) > +#define PMU_UNIT_RC (0x1F) > +#define PMU_UNIT_NULL (0x20) > +#define PMU_UNIT_END (0x23) > +#define PMU_UNIT_TEST_START (0xFE) > +#define PMU_UNIT_END_SIM (0xFF) > +#define PMU_UNIT_TEST_END (0xFF) > + > +#define PMU_UNIT_ID_IS_VALID(id) \ > + (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) > +#define PMU_DMEM_ALIGNMENT (4) > + > #define BUSY_SLOT 0 > #define CLK_SLOT 7 > +#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin" > + > +/*Choices for DMA to use*/ > +enum { > + GK20A_PMU_DMAIDX_UCODE = 0, > + GK20A_PMU_DMAIDX_VIRT = 1, > + GK20A_PMU_DMAIDX_PHYS_VID = 2, > + GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3, > + GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4, > + GK20A_PMU_DMAIDX_RSVD = 5, > + GK20A_PMU_DMAIDX_PELPG = 6, > + GK20A_PMU_DMAIDX_END = 7 > +}; > + > +struct pmu_buf_desc { > + struct nvkm_gpuobj *obj; > + struct nvkm_vma vma; > + size_t size; > +}; > + > +struct nvkm_pmu_priv_vm { > + struct nvkm_gpuobj *mem; > + struct nvkm_gpuobj *pgd; > + struct nvkm_vm *vm; > +}; > + > +/*Choices for pmu_state*/ > +enum { > + PMU_STATE_OFF, /*0 PMU is off */ > + PMU_STATE_STARTING, /*1 PMU is on, but not booted */ > + PMU_STATE_INIT_RECEIVED /*2 PMU init message received */ > +}; > + > +struct pmu_mem_gk20a { > + u32 dma_base; > + u8 dma_offset; > + u8 dma_idx; > + u16 fb_size; > +}; > + > +struct pmu_cmdline_args_gk20a { > + u32 cpu_freq_hz; /* Frequency of the clock driving PMU */ > + u32 falc_trace_size; /* falctrace buffer size (bytes) */ > + u32 falc_trace_dma_base; /* 256-byte block address */ > + u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */ > + u8 secure_mode; > + struct pmu_mem_gk20a gc6_ctx; /* dmem offset of gc6 context */ > +}; > + > +/*pmu ucode descriptor*/ > +struct pmu_ucode_desc { > + u32 descriptor_size; > + u32 image_size; > + u32 tools_version; > + u32 app_version; > + char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH]; > + u32 bootloader_start_offset; > + u32 bootloader_size; > + u32 bootloader_imem_offset; > + u32 bootloader_entry_point; > + u32 app_start_offset; > + u32 app_size; > + u32 app_imem_offset; > + u32 app_imem_entry; > + u32 app_dmem_offset; > + u32 app_resident_code_offset; > + u32 app_resident_code_size; > + u32 app_resident_data_offset; > + u32 app_resident_data_size; > + u32 nb_overlays; > + struct {u32 start; u32 size; } load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY]; > + u32 compressed; > +}; > + > +/*pmu msg header*/ > +struct pmu_hdr { > + u8 unit_id; > + u8 size; > + u8 ctrl_flags; > + u8 seq_id; > +}; > + > +#define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr) > + > +enum { > + PMU_INIT_MSG_TYPE_PMU_INIT = 0, > +}; > + > +/*pmu init msg format*/ > +struct pmu_init_msg_pmu_gk20a { > + u8 msg_type; > + u8 pad; > + u16 os_debug_entry_point; > + > + struct { > + u16 size; > + u16 offset; > + u8 index; > + u8 pad; > + } queue_info[PMU_QUEUE_COUNT]; > + > + u16 sw_managed_area_offset; > + u16 sw_managed_area_size; > +}; > + > +/*pmu init msg format*/ > +struct pmu_init_msg { > + union { > + u8 msg_type; > + struct pmu_init_msg_pmu_gk20a pmu_init_gk20a; > + }; > +}; > + > +enum { > + PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0, > +}; > + > +struct pmu_rc_msg_unhandled_cmd { > + u8 msg_type; > + u8 unit_id; > +}; > + > +struct pmu_rc_msg { > + u8 msg_type; > + struct pmu_rc_msg_unhandled_cmd unhandled_cmd; > +}; > + > +/*pmu generic msg format*/ > +struct pmu_msg { > + struct pmu_hdr hdr; > + union { > + struct pmu_init_msg init; > + struct pmu_rc_msg rc; > + } msg; > +}; > > struct gk20a_pmu_dvfs_data { > int p_load_target; > @@ -39,8 +211,19 @@ struct gk20a_pmu_priv { > struct nvkm_pmu base; > struct nvkm_alarm alarm; > struct gk20a_pmu_dvfs_data *data; > + struct pmu_ucode_desc *desc; > + struct pmu_buf_desc ucode; > + struct pmu_buf_desc trace_buf; > + struct mutex pmu_copy_lock; > + bool pmu_ready; > + int pmu_state; > + struct nvkm_pmu_priv_vm pmuvm; > + struct mutex isr_mutex; > + bool isr_enabled; > }; > > +#define to_gk20a_priv(ptr) container_of(ptr, struct gk20a_pmu_priv, base) > + > struct gk20a_pmu_dvfs_dev_status { > unsigned long total; > unsigned long busy; > @@ -48,6 +231,59 @@ struct gk20a_pmu_dvfs_dev_status { > }; > > static int > +gk20a_pmu_load_firmware(struct nvkm_pmu *pmu, const struct firmware **pfw) > +{ > + struct nvkm_device *dev; > + char fw[32]; > + > + dev = nv_device(pmu); > + snprintf(fw, sizeof(fw), "nvidia/tegra124/%s", GK20A_PMU_UCODE_IMAGE); > + return request_firmware(pfw, fw, nv_device_base(dev)); > +} > + > +static void > +gk20a_pmu_release_firmware(struct nvkm_pmu *pmu, const struct firmware *pfw) > +{ > + nv_debug(pmu, "firmware released\n"); > + release_firmware(pfw); > +} > + > +static void > +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *pmu, const struct firmware *fw) > +{ > + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; > + > + nv_debug(pmu, "GK20A PMU firmware information\n"); > + nv_debug(pmu, "descriptor size = %u\n", desc->descriptor_size); > + nv_debug(pmu, "image size = %u\n", desc->image_size); > + nv_debug(pmu, "app_version = 0x%08x\n", desc->app_version); > + nv_debug(pmu, "date = %s\n", desc->date); > + nv_debug(pmu, "bootloader_start_offset = 0x%08x\n", > + desc->bootloader_start_offset); > + nv_debug(pmu, "bootloader_size = 0x%08x\n", desc->bootloader_size); > + nv_debug(pmu, "bootloader_imem_offset = 0x%08x\n", > + desc->bootloader_imem_offset); > + nv_debug(pmu, "bootloader_entry_point = 0x%08x\n", > + desc->bootloader_entry_point); > + nv_debug(pmu, "app_start_offset = 0x%08x\n", desc->app_start_offset); > + nv_debug(pmu, "app_size = 0x%08x\n", desc->app_size); > + nv_debug(pmu, "app_imem_offset = 0x%08x\n", desc->app_imem_offset); > + nv_debug(pmu, "app_imem_entry = 0x%08x\n", desc->app_imem_entry); > + nv_debug(pmu, "app_dmem_offset = 0x%08x\n", desc->app_dmem_offset); > + nv_debug(pmu, "app_resident_code_offset = 0x%08x\n", > + desc->app_resident_code_offset); > + nv_debug(pmu, "app_resident_code_size = 0x%08x\n", > + desc->app_resident_code_size); > + nv_debug(pmu, "app_resident_data_offset = 0x%08x\n", > + desc->app_resident_data_offset); > + nv_debug(pmu, "app_resident_data_size = 0x%08x\n", > + desc->app_resident_data_size); > + nv_debug(pmu, "nb_overlays = %d\n", desc->nb_overlays); > + > + nv_debug(pmu, "compressed = %u\n", desc->compressed); > +} > + > +static int > gk20a_pmu_dvfs_target(struct gk20a_pmu_priv *priv, int *state) > { > struct nvkm_clk *clk = nvkm_clk(priv); > @@ -160,40 +396,529 @@ resched: > } > > static int > -gk20a_pmu_fini(struct nvkm_object *object, bool suspend) > +gk20a_pmu_enable_hw(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc, bool enable) > { > - struct nvkm_pmu *pmu = (void *)object; > - struct gk20a_pmu_priv *priv = (void *)pmu; > + if (enable) { > + nv_mask(pmc, 0x000200, 0x00002000, 0x00002000); > + nv_rd32(pmc, 0x00000200); > + if (nv_wait(priv, 0x0010a10c, 0x00000006, 0x00000000)) > + return 0; > + nv_mask(pmc, 0x00000200, 0x2000, 0x00000000); > + nv_error(priv, "Falcon mem scrubbing timeout\n"); > + return -ETIMEDOUT; > + } else { > + nv_mask(pmc, 0x00000200, 0x2000, 0x00000000); > + return 0; > + } > +} > +static void > +gk20a_pmu_enable_irq(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc, bool enable) > +{ > + if (enable) { > + nv_debug(priv, "enable pmu irq\n"); > + nv_wr32(priv, 0x0010a010, 0xff); > + nv_mask(pmc, 0x00000640, 0x1000000, 0x1000000); > + nv_mask(pmc, 0x00000644, 0x1000000, 0x1000000); > + } else { > + nv_debug(priv, "disable pmu irq\n"); > + nv_mask(pmc, 0x00000640, 0x1000000, 0x00000000); > + nv_mask(pmc, 0x00000644, 0x1000000, 0x00000000); > + nv_wr32(priv, 0x0010a014, 0xff); > + } > > - nvkm_timer_alarm_cancel(priv, &priv->alarm); > +} > > - return nvkm_subdev_fini(&pmu->base, suspend); > +static int > +gk20a_pmu_idle(struct gk20a_pmu_priv *priv) > +{ > + if (!nv_wait(priv, 0x0010a04c, 0x0000ffff, 0x00000000)) { > + nv_error(priv, "timeout waiting pmu idle\n"); > + return -EBUSY; > + } > + > + return 0; > +} > + > +static int > +gk20a_pmu_enable(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc, bool enable) > +{ > + u32 pmc_enable; > + int err; > + > + if (enable) { > + err = gk20a_pmu_enable_hw(priv, pmc, true); > + if (err) > + return err; > + > + err = gk20a_pmu_idle(priv); > + if (err) > + return err; > + > + gk20a_pmu_enable_irq(priv, pmc, true); > + } else { > + pmc_enable = nv_rd32(pmc, 0x200); > + if ((pmc_enable & 0x2000) != 0x0) { > + gk20a_pmu_enable_irq(priv, pmc, false); > + gk20a_pmu_enable_hw(priv, pmc, false); > + } > + } > + > + return 0; > +} > + > +static void > +gk20a_pmu_copy_to_dmem(struct gk20a_pmu_priv *priv, u32 dst, u8 *src, u32 size, > + u8 port) > +{ > + u32 i, words, bytes; > + u32 data, addr_mask; > + u32 *src_u32 = (u32 *)src; > + > + if (size == 0) { > + nv_error(priv, "size is zero\n"); > + goto out; > + } > + > + if (dst & 0x3) { > + nv_error(priv, "dst (0x%08x) not 4-byte aligned\n", dst); > + goto out; > + } > + > + mutex_lock(&priv->pmu_copy_lock); > + words = size >> 2; > + bytes = size & 0x3; > + addr_mask = 0xfffc; > + dst &= addr_mask; > + > + nv_wr32(priv, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24))); > + > + for (i = 0; i < words; i++) { > + nv_wr32(priv, (0x10a1c4 + (port * 8)), src_u32[i]); > + nv_debug(priv, "0x%08x\n", src_u32[i]); > + } > + > + if (bytes > 0) { > + data = 0; > + for (i = 0; i < bytes; i++) > + ((u8 *)&data)[i] = src[(words << 2) + i]; > + nv_wr32(priv, (0x10a1c4 + (port * 8)), data); > + nv_debug(priv, "0x%08x\n", data); > + } > + > + data = nv_rd32(priv, (0x10a1c0 + (port * 8))) & addr_mask; > + size = ALIGN(size, 4); > + if (data != dst + size) { > + nv_error(priv, "copy failed.... bytes written %d, expected %d", > + data - dst, size); > + } > + mutex_unlock(&priv->pmu_copy_lock); > +out: > + nv_debug(priv, "exit %s\n", __func__); > +} > + > +static void > +gk20a_copy_from_dmem(struct gk20a_pmu_priv *priv, u32 src, u8 *dst, u32 size, > + u8 port) > +{ > + u32 i, words, bytes; > + u32 data, addr_mask; > + u32 *dst_u32 = (u32 *)dst; > + > + if (size == 0) { > + nv_error(priv, "size is zero\n"); > + goto out; > + } > + > + if (src & 0x3) { > + nv_error(priv, "src (0x%08x) not 4-byte aligned\n", src); > + goto out; > + } > + > + mutex_lock(&priv->pmu_copy_lock); > + > + words = size >> 2; > + bytes = size & 0x3; > + > + addr_mask = 0xfffc; > + > + src &= addr_mask; > + > + nv_wr32(priv, (0x10a1c0 + (port * 8)), (src | (0x1 << 25))); > + > + for (i = 0; i < words; i++) { > + dst_u32[i] = nv_rd32(priv, (0x0010a1c4 + port * 8)); > + nv_debug(priv, "0x%08x\n", dst_u32[i]); > + } > + if (bytes > 0) { > + data = nv_rd32(priv, (0x0010a1c4 + port * 8)); > + nv_debug(priv, "0x%08x\n", data); > + > + for (i = 0; i < bytes; i++) > + dst[(words << 2) + i] = ((u8 *)&data)[i]; > + } > + mutex_unlock(&priv->pmu_copy_lock); > +out: > + nv_debug(priv, "exit %s\n", __func__); > +} > + > +static int > +gk20a_pmu_process_init_msg(struct gk20a_pmu_priv *priv, struct pmu_msg *msg) > +{ > + struct pmu_init_msg_pmu_gk20a *init; > + u32 tail; > + > + tail = nv_rd32(priv, 0x0010a4cc); > + > + gk20a_copy_from_dmem(priv, tail, (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0); > + > + if (msg->hdr.unit_id != PMU_UNIT_INIT) { > + nv_error(priv, "expecting init msg\n"); > + return -EINVAL; > + } > + > + gk20a_copy_from_dmem(priv, tail + PMU_MSG_HDR_SIZE, > + (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0); > + > + if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { > + nv_error(priv, "expecting init msg\n"); > + return -EINVAL; > + } > + > + tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT); > + nv_wr32(priv, 0x0010a4cc, tail); > + init = &msg->msg.init.pmu_init_gk20a; > + priv->pmu_ready = true; > + priv->pmu_state = PMU_STATE_INIT_RECEIVED; > + nv_debug(priv, "init msg processed\n"); > + return 0; > +} > + > +static void > +gk20a_pmu_process_message(struct work_struct *work) > +{ > + struct nvkm_pmu *pmu = container_of(work, struct nvkm_pmu, recv.work); > + struct gk20a_pmu_priv *priv = to_gk20a_priv(pmu); > + struct pmu_msg msg; > + struct nvkm_mc *pmc = nvkm_mc(pmu); > + > + mutex_lock(&priv->isr_mutex); > + if (unlikely(!priv->pmu_ready)) { > + nv_debug(pmu, "processing init msg\n"); > + gk20a_pmu_process_init_msg(priv, &msg); > + mutex_unlock(&priv->isr_mutex); > + gk20a_pmu_enable_irq(priv, pmc, true); > + } else { > + mutex_unlock(&priv->isr_mutex); > + } > +} > + > +static int > +gk20a_pmu_init_vm(struct gk20a_pmu_priv *priv, const struct firmware *fw) > +{ > + int ret = 0; > + u32 *ucode_image; > + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; > + int i; > + struct nvkm_pmu_priv_vm *pmuvm = &priv->pmuvm; > + struct nvkm_device *device = nv_device(&priv->base); > + struct nvkm_vm *vm; > + const u64 pmu_area_len = 300*1024; > + > + /* mem for inst blk*/ > + ret = nvkm_gpuobj_new(nv_object(priv), NULL, 0x1000, 0, 0, &pmuvm->mem); > + if (ret) > + return ret; > + > + /* mem for pgd*/ > + ret = nvkm_gpuobj_new(nv_object(priv), NULL, 0x8000, 0, 0, &pmuvm->pgd); > + if (ret) > + return ret; > + > + /*allocate virtual memory range*/ > + ret = nvkm_vm_new(device, 0, pmu_area_len, 0, &vm); > + if (ret) > + return ret; > + > + atomic_inc(&vm->engref[NVDEV_SUBDEV_PMU]); > + > + /* update VM with pgd */ > + ret = nvkm_vm_ref(vm, &pmuvm->vm, pmuvm->pgd); > + if (ret) > + return ret; > + > + /*update pgd in inst blk */ > + nv_wo32(pmuvm->mem, 0x0200, lower_32_bits(pmuvm->pgd->addr)); > + nv_wo32(pmuvm->mem, 0x0204, upper_32_bits(pmuvm->pgd->addr)); > + nv_wo32(pmuvm->mem, 0x0208, lower_32_bits(pmu_area_len - 1)); > + nv_wo32(pmuvm->mem, 0x020c, upper_32_bits(pmu_area_len - 1)); > + > + /* allocate memory for pmu fw to be copied to*/ > + ret = nvkm_gpuobj_new(nv_object(priv), NULL, GK20A_PMU_UCODE_SIZE_MAX, > + 0x1000, 0, &priv->ucode.obj); > + if (ret) > + return ret; > + > + ucode_image = (u32 *)((u8 *)desc + desc->descriptor_size); > + for (i = 0; i < (desc->app_start_offset + desc->app_size); i += 4) > + nv_wo32(priv->ucode.obj, i, ucode_image[i/4]); > + > + /* map allocated memory into GMMU */ > + ret = nvkm_gpuobj_map_vm(priv->ucode.obj, vm, NV_MEM_ACCESS_RW, > + &priv->ucode.vma); > + if (ret) > + return ret; > + > + return ret; > +} > + > +static int > +gk20a_init_pmu_setup_sw(struct gk20a_pmu_priv *priv) > +{ > + struct nvkm_pmu_priv_vm *pmuvm = &priv->pmuvm; > + int ret = 0; > + > + INIT_WORK(&priv->base.recv.work, gk20a_pmu_process_message); > + > + ret = nvkm_gpuobj_new(nv_object(priv), NULL, GK20A_PMU_TRACE_BUFSIZE, > + 0, 0, &priv->trace_buf.obj); > + if (ret) > + return ret; > + > + ret = nvkm_gpuobj_map_vm(nv_gpuobj(priv->trace_buf.obj), pmuvm->vm, > + NV_MEM_ACCESS_RW, &priv->trace_buf.vma); > + if (ret) > + return ret; > + > + return 0; > +} > + > +static int > +gk20a_pmu_bootstrap(struct gk20a_pmu_priv *priv) > +{ > + struct pmu_ucode_desc *desc = priv->desc; > + u32 addr_code, addr_data, addr_load; > + u32 i, blocks, addr_args; > + struct pmu_cmdline_args_gk20a cmdline_args; > + struct nvkm_pmu_priv_vm *pmuvm = &priv->pmuvm; > + > + nv_mask(priv, 0x0010a048, 0x01, 0x01); > + /*bind the address*/ > + nv_wr32(priv, 0x0010a480, > + pmuvm->mem->addr >> 12 | > + 0x1 << 30 | > + 0x20000000); > + > + /* TBD: load all other surfaces */ > + cmdline_args.falc_trace_size = GK20A_PMU_TRACE_BUFSIZE; > + cmdline_args.falc_trace_dma_base = > + lower_32_bits(priv->trace_buf.vma.offset >> 8); > + cmdline_args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT; > + cmdline_args.cpu_freq_hz = 204; > + cmdline_args.secure_mode = 0; > + > + addr_args = (nv_rd32(priv, 0x0010a108) >> 9) & 0x1ff; > + addr_args = addr_args << GK20A_PMU_DMEM_BLKSIZE2; > + addr_args -= sizeof(struct pmu_cmdline_args_gk20a); > + nv_debug(priv, "initiating copy to dmem\n"); > + gk20a_pmu_copy_to_dmem(priv, addr_args, > + (u8 *)&cmdline_args, > + sizeof(struct pmu_cmdline_args_gk20a), 0); > + > + nv_wr32(priv, 0x0010a1c0, 0x1 << 24); > + > + addr_code = lower_32_bits((priv->ucode.vma.offset + > + desc->app_start_offset + > + desc->app_resident_code_offset) >> 8); > + > + addr_data = lower_32_bits((priv->ucode.vma.offset + > + desc->app_start_offset + > + desc->app_resident_data_offset) >> 8); > + > + addr_load = lower_32_bits((priv->ucode.vma.offset + > + desc->bootloader_start_offset) >> 8); > + > + nv_wr32(priv, 0x0010a1c4, GK20A_PMU_DMAIDX_UCODE); > + nv_debug(priv, "0x%08x\n", GK20A_PMU_DMAIDX_UCODE); > + nv_wr32(priv, 0x0010a1c4, (addr_code)); > + nv_debug(priv, "0x%08x\n", (addr_code)); > + nv_wr32(priv, 0x0010a1c4, desc->app_size); > + nv_debug(priv, "0x%08x\n", desc->app_size); > + nv_wr32(priv, 0x0010a1c4, desc->app_resident_code_size); > + nv_debug(priv, "0x%08x\n", desc->app_resident_code_size); > + nv_wr32(priv, 0x0010a1c4, desc->app_imem_entry); > + nv_debug(priv, "0x%08x\n", desc->app_imem_entry); > + nv_wr32(priv, 0x0010a1c4, (addr_data)); > + nv_debug(priv, "0x%08x\n", (addr_data)); > + nv_wr32(priv, 0x0010a1c4, desc->app_resident_data_size); > + nv_debug(priv, "0x%08x\n", desc->app_resident_data_size); > + nv_wr32(priv, 0x0010a1c4, (addr_code)); > + nv_debug(priv, "0x%08x\n", (addr_code)); > + nv_wr32(priv, 0x0010a1c4, 0x1); > + nv_debug(priv, "0x%08x\n", 1); > + nv_wr32(priv, 0x0010a1c4, addr_args); > + nv_debug(priv, "0x%08x\n", addr_args); > + > + nv_wr32(priv, 0x0010a110, > + (addr_load) - (desc->bootloader_imem_offset >> 8)); > + > + blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8; > + > + for (i = 0; i < blocks; i++) { > + nv_wr32(priv, 0x0010a114, > + desc->bootloader_imem_offset + (i << 8)); > + nv_wr32(priv, 0x0010a11c, > + desc->bootloader_imem_offset + (i << 8)); > + nv_wr32(priv, 0x0010a118, > + 0x01 << 4 | > + 0x06 << 8 | > + ((GK20A_PMU_DMAIDX_UCODE & 0x07) << 12)); > + } > + > + nv_wr32(priv, 0x0010a104, (desc->bootloader_entry_point)); > + nv_wr32(priv, 0x0010a100, 0x1 << 1); > + nv_wr32(priv, 0x0010a080, desc->app_version); > + > + return 0; > +} > + > +static int > +gk20a_init_pmu_setup_hw1(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc) > +{ > + int err; > + > + mutex_lock(&priv->isr_mutex); > + err = gk20a_pmu_enable(priv, pmc, true); > + priv->isr_enabled = (err == 0); > + mutex_unlock(&priv->isr_mutex); > + if (err) > + return err; > + > + /* setup apertures - virtual */ > + nv_wr32(priv, 0x10a600 + 0 * 4, 0x0); > + nv_wr32(priv, 0x10a600 + 1 * 4, 0x0); > + /* setup apertures - physical */ > + nv_wr32(priv, 0x10a600 + 2 * 4, 0x4 | 0x0); > + nv_wr32(priv, 0x10a600 + 3 * 4, 0x4 | 0x1); > + nv_wr32(priv, 0x10a600 + 4 * 4, 0x4 | 0x2); > + > + /* TBD: load pmu ucode */ > + err = gk20a_pmu_bootstrap(priv); > + if (err) > + return err; > + > + return 0; > +} > + > + > +static void > +gk20a_pmu_intr(struct nvkm_subdev *subdev) > +{ > + struct gk20a_pmu_priv *priv = to_gk20a_priv(nvkm_pmu(subdev)); > + struct nvkm_mc *pmc = nvkm_mc(priv); > + u32 intr, mask; > + > + if (!priv->isr_enabled) > + return; > + > + mask = nv_rd32(priv, 0x0010a018) & nv_rd32(priv, 0x0010a01c); > + > + intr = nv_rd32(priv, 0x0010a008) & mask; > + > + nv_debug(priv, "received falcon interrupt: 0x%08x\n", intr); > + gk20a_pmu_enable_irq(priv, pmc, false); > + > + if (!intr || priv->pmu_state == PMU_STATE_OFF) { > + nv_wr32(priv, 0x0010a004, intr); > + nv_error(priv, "pmu state off\n"); > + gk20a_pmu_enable_irq(priv, pmc, true); > + } > + > + if (intr & 0x10) > + nv_error(priv, "pmu halt intr not implemented\n"); > + > + if (intr & 0x20) { > + nv_error(priv, "exterr interrupt not impl..Clear interrupt\n"); > + nv_mask(priv, 0x0010a16c, (0x1 << 31), 0x00000000); > + } > + > + if (intr & 0x40) { > + nv_debug(priv, "scheduling work\n"); > + schedule_work(&priv->base.recv.work); > + } > + > + nv_wr32(priv, 0x0010a004, intr); > + nv_debug(priv, "irq handled\n"); > +} > + > +static void > +gk20a_pmu_pgob(struct nvkm_pmu *pmu, bool enable) > +{ > } > > static int > gk20a_pmu_init(struct nvkm_object *object) > { > - struct nvkm_pmu *pmu = (void *)object; > - struct gk20a_pmu_priv *priv = (void *)pmu; > + struct gk20a_pmu_priv *priv = (void *)object; > + struct nvkm_mc *pmc = nvkm_mc(object); > int ret; > > - ret = nvkm_subdev_init(&pmu->base); > + ret = nvkm_subdev_init(&priv->base.base); > if (ret) > return ret; > > - pmu->pgob = nvkm_pmu_pgob; > + priv->pmu_state = PMU_STATE_STARTING; > + ret = gk20a_init_pmu_setup_hw1(priv, pmc); > + if (ret) > + return ret; > + > + nv_wr32(priv, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); > + nv_wr32(priv, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); > + nv_wr32(priv, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); > > - /* init pwr perf counter */ > - nv_wr32(pmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); > - nv_wr32(pmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); > - nv_wr32(pmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); > + nvkm_timer_alarm(priv, 2000000000, &priv->alarm); > > - nvkm_timer_alarm(pmu, 2000000000, &priv->alarm); > return ret; > } > > +static int > +gk20a_pmu_fini(struct nvkm_object *object, bool suspend) > +{ > + struct gk20a_pmu_priv *priv = (void *)object; > + struct nvkm_mc *pmc = nvkm_mc(object); > + > + nvkm_timer_alarm_cancel(priv, &priv->alarm); > + > + cancel_work_sync(&priv->base.recv.work); > + > + mutex_lock(&priv->isr_mutex); > + gk20a_pmu_enable(priv, pmc, false); > + priv->isr_enabled = false; > + mutex_unlock(&priv->isr_mutex); > + > + priv->pmu_state = PMU_STATE_OFF; > + priv->pmu_ready = false; > + nv_wr32(priv, 0x10a014, 0x00000060); > + > + return nvkm_subdev_fini(&priv->base.base, suspend); > +} > + > +static void > +gk20a_pmu_dtor(struct nvkm_object *object) > +{ > + struct gk20a_pmu_priv *priv = (void *)object; > + > + nvkm_gpuobj_unmap(&priv->trace_buf.vma); > + nvkm_gpuobj_ref(NULL, &priv->trace_buf.obj); > + > + nvkm_gpuobj_unmap(&priv->ucode.vma); > + nvkm_gpuobj_ref(NULL, &priv->ucode.obj); > + nvkm_vm_ref(NULL, &priv->pmuvm.vm, priv->pmuvm.pgd); > + nvkm_gpuobj_ref(NULL, &priv->pmuvm.pgd); > + nvkm_gpuobj_ref(NULL, &priv->pmuvm.mem); > +} > + > static struct gk20a_pmu_dvfs_data > -gk20a_dvfs_data= { > +gk20a_dvfs_data = { > .p_load_target = 70, > .p_load_max = 90, > .p_smooth = 1, > @@ -205,6 +930,9 @@ gk20a_pmu_ctor(struct nvkm_object *parent, struct nvkm_object *engine, > struct nvkm_object **pobject) > { > struct gk20a_pmu_priv *priv; > + struct nvkm_pmu *pmu; > + struct nvkm_mc *pmc; > + const struct firmware *pmufw = NULL; > int ret; > > ret = nvkm_pmu_create(parent, engine, oclass, &priv); > @@ -212,10 +940,47 @@ gk20a_pmu_ctor(struct nvkm_object *parent, struct nvkm_object *engine, > if (ret) > return ret; > > + mutex_init(&priv->isr_mutex); > + mutex_init(&priv->pmu_copy_lock); > priv->data = &gk20a_dvfs_data; > + pmu = &priv->base; > + pmc = nvkm_mc(pmu); > + nv_subdev(pmu)->intr = gk20a_pmu_intr; > > + ret = gk20a_pmu_load_firmware(pmu, &pmufw); > + if (ret < 0) { > + nv_error(priv, "failed to load pmu fimware\n"); > + return ret; > + } > + > + ret = gk20a_pmu_init_vm(priv, pmufw); > + if (ret < 0) { > + nv_error(priv, "failed to map pmu fw to va space\n"); > + goto err; > + } > + > + priv->desc = (struct pmu_ucode_desc *)pmufw->data; > + gk20a_pmu_dump_firmware_info(pmu, pmufw); > + > + if (priv->desc->app_version != APP_VERSION_GK20A) { > + nv_error(priv, "PMU version unsupported: %d\n", > + priv->desc->app_version); > + ret = -EINVAL; > + goto err; > + } > + > + ret = gk20a_init_pmu_setup_sw(priv); > + if (ret) > + goto err; > + > + pmu->pgob = nvkm_pmu_pgob; > nvkm_alarm_init(&priv->alarm, gk20a_pmu_dvfs_work); > + > return 0; > + > +err: > + gk20a_pmu_release_firmware(pmu, pmufw); > + return ret; > } > > struct nvkm_oclass * > @@ -223,8 +988,10 @@ gk20a_pmu_oclass = &(struct nvkm_pmu_impl) { > .base.handle = NV_SUBDEV(PMU, 0xea), > .base.ofuncs = &(struct nvkm_ofuncs) { > .ctor = gk20a_pmu_ctor, > - .dtor = _nvkm_pmu_dtor, > + .dtor = gk20a_pmu_dtor, > .init = gk20a_pmu_init, > .fini = gk20a_pmu_fini, > }, > + .pgob = gk20a_pmu_pgob, > }.base; > + > -- To unsubscribe from this list: send the line "unsubscribe linux-tegra" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 13 April 2015 at 20:42, Alexandre Courbot <acourbot@nvidia.com> wrote: > Ben, I guess our main remaining concern with this patch is how it should > integrate wrt. the existing PMU code. Since it is designed to interact with > the NVIDIA firmware, maybe we should use a different base code, or do you > think we can somehow share code and data structures? Hey Alexandre, Sorry for the delay in responding to this. My original thinking with transitioning to use NVIDIA's firmware was that I'd modify our firmware interfaces to match yours, and share the code. I haven't started on any of this yet due to not having any word on how you guys will be shipping the images, etc. It would be nice to have some communication on these things :) I'm suspecting you won't be wanting to modify our falcon assembly, so I guess I'll set aside some time to use this patch as a base and transition our ucode to boot using it? Then you guys can build more stuff on top of that. I'm also happy to let you modify our ucode if you wish :) Thanks, Ben. > > Deepak, as explained in the review, I think your time is better spent making > progress on (and starting submission of) the secure boot code. Hopefully > this patch starts to be in good shape, so I will take care of it from now > on. Thanks! > > > On 04/13/2015 07:38 PM, Alexandre Courbot wrote: >> >> From: Deepak Goyal <dgoyal@nvidia.com> >> >> - Maps PMU firmware into PMU virtual memory. >> - Copy bootloader into PMU memory and start it. >> - Allow the PMU to interact with HOST via interrupts. >> >> PMU after successful configurations (to follow after this patch) will: >> 1.Autonomously power gate graphics engine when not in use.It will save >> us a lot of power. >> 2.Provide better way to scale frequencies by reporting Perf counters. >> 3.Be critical for GPU functionality as future GPUs secure some register >> & mem accesses involved in context switch. >> >> Signed-off-by: Deepak Goyal <dgoyal@nvidia.com> >> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com> >> --- >> Changes since v3: >> - Cleaned formatting, renamed variables according to Nouveau standards >> - (Hopefully) fixed init/fini sequence >> - Removed a few more unneeded variables >> >> drm/nouveau/nvkm/subdev/pmu/gk20a.c | 803 >> +++++++++++++++++++++++++++++++++++- >> 1 file changed, 785 insertions(+), 18 deletions(-) >> >> diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c >> b/drm/nouveau/nvkm/subdev/pmu/gk20a.c >> index 594f746e68f2..c206ec5e558a 100644 >> --- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c >> +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c >> @@ -1,5 +1,5 @@ >> /* >> - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. >> + * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. >> * >> * Permission is hereby granted, free of charge, to any person obtaining >> a >> * copy of this software and associated documentation files (the >> "Software"), >> @@ -19,14 +19,186 @@ >> * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER >> * DEALINGS IN THE SOFTWARE. >> */ >> -#include "priv.h" >> >> +#include "priv.h" >> +#include <core/client.h> >> +#include <core/gpuobj.h> >> +#include <subdev/bar.h> >> +#include <subdev/fb.h> >> +#include <subdev/mc.h> >> +#include <subdev/timer.h> >> +#include <subdev/mmu.h> >> +#include <subdev/pmu.h> >> +#include <core/object.h> >> +#include <core/device.h> >> +#include <linux/delay.h> >> +#include <linux/firmware.h> >> #include <subdev/clk.h> >> #include <subdev/timer.h> >> #include <subdev/volt.h> >> >> +#define APP_VERSION_GK20A 17997577 >> +#define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) >> +#define PMU_QUEUE_COUNT 5 >> + >> +#define GK20A_PMU_TRACE_BUFSIZE 0x4000 /* 4K */ >> +#define GK20A_PMU_DMEM_BLKSIZE2 8 >> +#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32 >> +#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64 >> + >> +#define PMU_UNIT_REWIND (0x00) >> +#define PMU_UNIT_PG (0x03) >> +#define PMU_UNIT_INIT (0x07) >> +#define PMU_UNIT_PERFMON (0x12) >> +#define PMU_UNIT_THERM (0x1B) >> +#define PMU_UNIT_RC (0x1F) >> +#define PMU_UNIT_NULL (0x20) >> +#define PMU_UNIT_END (0x23) >> +#define PMU_UNIT_TEST_START (0xFE) >> +#define PMU_UNIT_END_SIM (0xFF) >> +#define PMU_UNIT_TEST_END (0xFF) >> + >> +#define PMU_UNIT_ID_IS_VALID(id) \ >> + (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) >> +#define PMU_DMEM_ALIGNMENT (4) >> + >> #define BUSY_SLOT 0 >> #define CLK_SLOT 7 >> +#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin" >> + >> +/*Choices for DMA to use*/ >> +enum { >> + GK20A_PMU_DMAIDX_UCODE = 0, >> + GK20A_PMU_DMAIDX_VIRT = 1, >> + GK20A_PMU_DMAIDX_PHYS_VID = 2, >> + GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3, >> + GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4, >> + GK20A_PMU_DMAIDX_RSVD = 5, >> + GK20A_PMU_DMAIDX_PELPG = 6, >> + GK20A_PMU_DMAIDX_END = 7 >> +}; >> + >> +struct pmu_buf_desc { >> + struct nvkm_gpuobj *obj; >> + struct nvkm_vma vma; >> + size_t size; >> +}; >> + >> +struct nvkm_pmu_priv_vm { >> + struct nvkm_gpuobj *mem; >> + struct nvkm_gpuobj *pgd; >> + struct nvkm_vm *vm; >> +}; >> + >> +/*Choices for pmu_state*/ >> +enum { >> + PMU_STATE_OFF, /*0 PMU is off */ >> + PMU_STATE_STARTING, /*1 PMU is on, but not booted */ >> + PMU_STATE_INIT_RECEIVED /*2 PMU init message received */ >> +}; >> + >> +struct pmu_mem_gk20a { >> + u32 dma_base; >> + u8 dma_offset; >> + u8 dma_idx; >> + u16 fb_size; >> +}; >> + >> +struct pmu_cmdline_args_gk20a { >> + u32 cpu_freq_hz; /* Frequency of the clock driving >> PMU */ >> + u32 falc_trace_size; /* falctrace buffer size (bytes) >> */ >> + u32 falc_trace_dma_base; /* 256-byte block address */ >> + u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */ >> + u8 secure_mode; >> + struct pmu_mem_gk20a gc6_ctx; /* dmem offset of gc6 >> context */ >> +}; >> + >> +/*pmu ucode descriptor*/ >> +struct pmu_ucode_desc { >> + u32 descriptor_size; >> + u32 image_size; >> + u32 tools_version; >> + u32 app_version; >> + char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH]; >> + u32 bootloader_start_offset; >> + u32 bootloader_size; >> + u32 bootloader_imem_offset; >> + u32 bootloader_entry_point; >> + u32 app_start_offset; >> + u32 app_size; >> + u32 app_imem_offset; >> + u32 app_imem_entry; >> + u32 app_dmem_offset; >> + u32 app_resident_code_offset; >> + u32 app_resident_code_size; >> + u32 app_resident_data_offset; >> + u32 app_resident_data_size; >> + u32 nb_overlays; >> + struct {u32 start; u32 size; } >> load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY]; >> + u32 compressed; >> +}; >> + >> +/*pmu msg header*/ >> +struct pmu_hdr { >> + u8 unit_id; >> + u8 size; >> + u8 ctrl_flags; >> + u8 seq_id; >> +}; >> + >> +#define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr) >> + >> +enum { >> + PMU_INIT_MSG_TYPE_PMU_INIT = 0, >> +}; >> + >> +/*pmu init msg format*/ >> +struct pmu_init_msg_pmu_gk20a { >> + u8 msg_type; >> + u8 pad; >> + u16 os_debug_entry_point; >> + >> + struct { >> + u16 size; >> + u16 offset; >> + u8 index; >> + u8 pad; >> + } queue_info[PMU_QUEUE_COUNT]; >> + >> + u16 sw_managed_area_offset; >> + u16 sw_managed_area_size; >> +}; >> + >> +/*pmu init msg format*/ >> +struct pmu_init_msg { >> + union { >> + u8 msg_type; >> + struct pmu_init_msg_pmu_gk20a pmu_init_gk20a; >> + }; >> +}; >> + >> +enum { >> + PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0, >> +}; >> + >> +struct pmu_rc_msg_unhandled_cmd { >> + u8 msg_type; >> + u8 unit_id; >> +}; >> + >> +struct pmu_rc_msg { >> + u8 msg_type; >> + struct pmu_rc_msg_unhandled_cmd unhandled_cmd; >> +}; >> + >> +/*pmu generic msg format*/ >> +struct pmu_msg { >> + struct pmu_hdr hdr; >> + union { >> + struct pmu_init_msg init; >> + struct pmu_rc_msg rc; >> + } msg; >> +}; >> >> struct gk20a_pmu_dvfs_data { >> int p_load_target; >> @@ -39,8 +211,19 @@ struct gk20a_pmu_priv { >> struct nvkm_pmu base; >> struct nvkm_alarm alarm; >> struct gk20a_pmu_dvfs_data *data; >> + struct pmu_ucode_desc *desc; >> + struct pmu_buf_desc ucode; >> + struct pmu_buf_desc trace_buf; >> + struct mutex pmu_copy_lock; >> + bool pmu_ready; >> + int pmu_state; >> + struct nvkm_pmu_priv_vm pmuvm; >> + struct mutex isr_mutex; >> + bool isr_enabled; >> }; >> >> +#define to_gk20a_priv(ptr) container_of(ptr, struct gk20a_pmu_priv, base) >> + >> struct gk20a_pmu_dvfs_dev_status { >> unsigned long total; >> unsigned long busy; >> @@ -48,6 +231,59 @@ struct gk20a_pmu_dvfs_dev_status { >> }; >> >> static int >> +gk20a_pmu_load_firmware(struct nvkm_pmu *pmu, const struct firmware >> **pfw) >> +{ >> + struct nvkm_device *dev; >> + char fw[32]; >> + >> + dev = nv_device(pmu); >> + snprintf(fw, sizeof(fw), "nvidia/tegra124/%s", >> GK20A_PMU_UCODE_IMAGE); >> + return request_firmware(pfw, fw, nv_device_base(dev)); >> +} >> + >> +static void >> +gk20a_pmu_release_firmware(struct nvkm_pmu *pmu, const struct firmware >> *pfw) >> +{ >> + nv_debug(pmu, "firmware released\n"); >> + release_firmware(pfw); >> +} >> + >> +static void >> +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *pmu, const struct firmware >> *fw) >> +{ >> + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; >> + >> + nv_debug(pmu, "GK20A PMU firmware information\n"); >> + nv_debug(pmu, "descriptor size = %u\n", desc->descriptor_size); >> + nv_debug(pmu, "image size = %u\n", desc->image_size); >> + nv_debug(pmu, "app_version = 0x%08x\n", desc->app_version); >> + nv_debug(pmu, "date = %s\n", desc->date); >> + nv_debug(pmu, "bootloader_start_offset = 0x%08x\n", >> + desc->bootloader_start_offset); >> + nv_debug(pmu, "bootloader_size = 0x%08x\n", >> desc->bootloader_size); >> + nv_debug(pmu, "bootloader_imem_offset = 0x%08x\n", >> + desc->bootloader_imem_offset); >> + nv_debug(pmu, "bootloader_entry_point = 0x%08x\n", >> + desc->bootloader_entry_point); >> + nv_debug(pmu, "app_start_offset = 0x%08x\n", >> desc->app_start_offset); >> + nv_debug(pmu, "app_size = 0x%08x\n", desc->app_size); >> + nv_debug(pmu, "app_imem_offset = 0x%08x\n", >> desc->app_imem_offset); >> + nv_debug(pmu, "app_imem_entry = 0x%08x\n", desc->app_imem_entry); >> + nv_debug(pmu, "app_dmem_offset = 0x%08x\n", >> desc->app_dmem_offset); >> + nv_debug(pmu, "app_resident_code_offset = 0x%08x\n", >> + desc->app_resident_code_offset); >> + nv_debug(pmu, "app_resident_code_size = 0x%08x\n", >> + desc->app_resident_code_size); >> + nv_debug(pmu, "app_resident_data_offset = 0x%08x\n", >> + desc->app_resident_data_offset); >> + nv_debug(pmu, "app_resident_data_size = 0x%08x\n", >> + desc->app_resident_data_size); >> + nv_debug(pmu, "nb_overlays = %d\n", desc->nb_overlays); >> + >> + nv_debug(pmu, "compressed = %u\n", desc->compressed); >> +} >> + >> +static int >> gk20a_pmu_dvfs_target(struct gk20a_pmu_priv *priv, int *state) >> { >> struct nvkm_clk *clk = nvkm_clk(priv); >> @@ -160,40 +396,529 @@ resched: >> } >> >> static int >> -gk20a_pmu_fini(struct nvkm_object *object, bool suspend) >> +gk20a_pmu_enable_hw(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc, >> bool enable) >> { >> - struct nvkm_pmu *pmu = (void *)object; >> - struct gk20a_pmu_priv *priv = (void *)pmu; >> + if (enable) { >> + nv_mask(pmc, 0x000200, 0x00002000, 0x00002000); >> + nv_rd32(pmc, 0x00000200); >> + if (nv_wait(priv, 0x0010a10c, 0x00000006, 0x00000000)) >> + return 0; >> + nv_mask(pmc, 0x00000200, 0x2000, 0x00000000); >> + nv_error(priv, "Falcon mem scrubbing timeout\n"); >> + return -ETIMEDOUT; >> + } else { >> + nv_mask(pmc, 0x00000200, 0x2000, 0x00000000); >> + return 0; >> + } >> +} >> +static void >> +gk20a_pmu_enable_irq(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc, >> bool enable) >> +{ >> + if (enable) { >> + nv_debug(priv, "enable pmu irq\n"); >> + nv_wr32(priv, 0x0010a010, 0xff); >> + nv_mask(pmc, 0x00000640, 0x1000000, 0x1000000); >> + nv_mask(pmc, 0x00000644, 0x1000000, 0x1000000); >> + } else { >> + nv_debug(priv, "disable pmu irq\n"); >> + nv_mask(pmc, 0x00000640, 0x1000000, 0x00000000); >> + nv_mask(pmc, 0x00000644, 0x1000000, 0x00000000); >> + nv_wr32(priv, 0x0010a014, 0xff); >> + } >> >> - nvkm_timer_alarm_cancel(priv, &priv->alarm); >> +} >> >> - return nvkm_subdev_fini(&pmu->base, suspend); >> +static int >> +gk20a_pmu_idle(struct gk20a_pmu_priv *priv) >> +{ >> + if (!nv_wait(priv, 0x0010a04c, 0x0000ffff, 0x00000000)) { >> + nv_error(priv, "timeout waiting pmu idle\n"); >> + return -EBUSY; >> + } >> + >> + return 0; >> +} >> + >> +static int >> +gk20a_pmu_enable(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc, bool >> enable) >> +{ >> + u32 pmc_enable; >> + int err; >> + >> + if (enable) { >> + err = gk20a_pmu_enable_hw(priv, pmc, true); >> + if (err) >> + return err; >> + >> + err = gk20a_pmu_idle(priv); >> + if (err) >> + return err; >> + >> + gk20a_pmu_enable_irq(priv, pmc, true); >> + } else { >> + pmc_enable = nv_rd32(pmc, 0x200); >> + if ((pmc_enable & 0x2000) != 0x0) { >> + gk20a_pmu_enable_irq(priv, pmc, false); >> + gk20a_pmu_enable_hw(priv, pmc, false); >> + } >> + } >> + >> + return 0; >> +} >> + >> +static void >> +gk20a_pmu_copy_to_dmem(struct gk20a_pmu_priv *priv, u32 dst, u8 *src, u32 >> size, >> + u8 port) >> +{ >> + u32 i, words, bytes; >> + u32 data, addr_mask; >> + u32 *src_u32 = (u32 *)src; >> + >> + if (size == 0) { >> + nv_error(priv, "size is zero\n"); >> + goto out; >> + } >> + >> + if (dst & 0x3) { >> + nv_error(priv, "dst (0x%08x) not 4-byte aligned\n", dst); >> + goto out; >> + } >> + >> + mutex_lock(&priv->pmu_copy_lock); >> + words = size >> 2; >> + bytes = size & 0x3; >> + addr_mask = 0xfffc; >> + dst &= addr_mask; >> + >> + nv_wr32(priv, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24))); >> + >> + for (i = 0; i < words; i++) { >> + nv_wr32(priv, (0x10a1c4 + (port * 8)), src_u32[i]); >> + nv_debug(priv, "0x%08x\n", src_u32[i]); >> + } >> + >> + if (bytes > 0) { >> + data = 0; >> + for (i = 0; i < bytes; i++) >> + ((u8 *)&data)[i] = src[(words << 2) + i]; >> + nv_wr32(priv, (0x10a1c4 + (port * 8)), data); >> + nv_debug(priv, "0x%08x\n", data); >> + } >> + >> + data = nv_rd32(priv, (0x10a1c0 + (port * 8))) & addr_mask; >> + size = ALIGN(size, 4); >> + if (data != dst + size) { >> + nv_error(priv, "copy failed.... bytes written %d, expected >> %d", >> + data - dst, >> size); >> + } >> + mutex_unlock(&priv->pmu_copy_lock); >> +out: >> + nv_debug(priv, "exit %s\n", __func__); >> +} >> + >> +static void >> +gk20a_copy_from_dmem(struct gk20a_pmu_priv *priv, u32 src, u8 *dst, u32 >> size, >> + u8 port) >> +{ >> + u32 i, words, bytes; >> + u32 data, addr_mask; >> + u32 *dst_u32 = (u32 *)dst; >> + >> + if (size == 0) { >> + nv_error(priv, "size is zero\n"); >> + goto out; >> + } >> + >> + if (src & 0x3) { >> + nv_error(priv, "src (0x%08x) not 4-byte aligned\n", src); >> + goto out; >> + } >> + >> + mutex_lock(&priv->pmu_copy_lock); >> + >> + words = size >> 2; >> + bytes = size & 0x3; >> + >> + addr_mask = 0xfffc; >> + >> + src &= addr_mask; >> + >> + nv_wr32(priv, (0x10a1c0 + (port * 8)), (src | (0x1 << 25))); >> + >> + for (i = 0; i < words; i++) { >> + dst_u32[i] = nv_rd32(priv, (0x0010a1c4 + port * 8)); >> + nv_debug(priv, "0x%08x\n", dst_u32[i]); >> + } >> + if (bytes > 0) { >> + data = nv_rd32(priv, (0x0010a1c4 + port * 8)); >> + nv_debug(priv, "0x%08x\n", data); >> + >> + for (i = 0; i < bytes; i++) >> + dst[(words << 2) + i] = ((u8 *)&data)[i]; >> + } >> + mutex_unlock(&priv->pmu_copy_lock); >> +out: >> + nv_debug(priv, "exit %s\n", __func__); >> +} >> + >> +static int >> +gk20a_pmu_process_init_msg(struct gk20a_pmu_priv *priv, struct pmu_msg >> *msg) >> +{ >> + struct pmu_init_msg_pmu_gk20a *init; >> + u32 tail; >> + >> + tail = nv_rd32(priv, 0x0010a4cc); >> + >> + gk20a_copy_from_dmem(priv, tail, (u8 *)&msg->hdr, >> PMU_MSG_HDR_SIZE, 0); >> + >> + if (msg->hdr.unit_id != PMU_UNIT_INIT) { >> + nv_error(priv, "expecting init msg\n"); >> + return -EINVAL; >> + } >> + >> + gk20a_copy_from_dmem(priv, tail + PMU_MSG_HDR_SIZE, >> + (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0); >> + >> + if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { >> + nv_error(priv, "expecting init msg\n"); >> + return -EINVAL; >> + } >> + >> + tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT); >> + nv_wr32(priv, 0x0010a4cc, tail); >> + init = &msg->msg.init.pmu_init_gk20a; >> + priv->pmu_ready = true; >> + priv->pmu_state = PMU_STATE_INIT_RECEIVED; >> + nv_debug(priv, "init msg processed\n"); >> + return 0; >> +} >> + >> +static void >> +gk20a_pmu_process_message(struct work_struct *work) >> +{ >> + struct nvkm_pmu *pmu = container_of(work, struct nvkm_pmu, >> recv.work); >> + struct gk20a_pmu_priv *priv = to_gk20a_priv(pmu); >> + struct pmu_msg msg; >> + struct nvkm_mc *pmc = nvkm_mc(pmu); >> + >> + mutex_lock(&priv->isr_mutex); >> + if (unlikely(!priv->pmu_ready)) { >> + nv_debug(pmu, "processing init msg\n"); >> + gk20a_pmu_process_init_msg(priv, &msg); >> + mutex_unlock(&priv->isr_mutex); >> + gk20a_pmu_enable_irq(priv, pmc, true); >> + } else { >> + mutex_unlock(&priv->isr_mutex); >> + } >> +} >> + >> +static int >> +gk20a_pmu_init_vm(struct gk20a_pmu_priv *priv, const struct firmware *fw) >> +{ >> + int ret = 0; >> + u32 *ucode_image; >> + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; >> + int i; >> + struct nvkm_pmu_priv_vm *pmuvm = &priv->pmuvm; >> + struct nvkm_device *device = nv_device(&priv->base); >> + struct nvkm_vm *vm; >> + const u64 pmu_area_len = 300*1024; >> + >> + /* mem for inst blk*/ >> + ret = nvkm_gpuobj_new(nv_object(priv), NULL, 0x1000, 0, 0, >> &pmuvm->mem); >> + if (ret) >> + return ret; >> + >> + /* mem for pgd*/ >> + ret = nvkm_gpuobj_new(nv_object(priv), NULL, 0x8000, 0, 0, >> &pmuvm->pgd); >> + if (ret) >> + return ret; >> + >> + /*allocate virtual memory range*/ >> + ret = nvkm_vm_new(device, 0, pmu_area_len, 0, &vm); >> + if (ret) >> + return ret; >> + >> + atomic_inc(&vm->engref[NVDEV_SUBDEV_PMU]); >> + >> + /* update VM with pgd */ >> + ret = nvkm_vm_ref(vm, &pmuvm->vm, pmuvm->pgd); >> + if (ret) >> + return ret; >> + >> + /*update pgd in inst blk */ >> + nv_wo32(pmuvm->mem, 0x0200, lower_32_bits(pmuvm->pgd->addr)); >> + nv_wo32(pmuvm->mem, 0x0204, upper_32_bits(pmuvm->pgd->addr)); >> + nv_wo32(pmuvm->mem, 0x0208, lower_32_bits(pmu_area_len - 1)); >> + nv_wo32(pmuvm->mem, 0x020c, upper_32_bits(pmu_area_len - 1)); >> + >> + /* allocate memory for pmu fw to be copied to*/ >> + ret = nvkm_gpuobj_new(nv_object(priv), NULL, >> GK20A_PMU_UCODE_SIZE_MAX, >> + 0x1000, 0, &priv->ucode.obj); >> + if (ret) >> + return ret; >> + >> + ucode_image = (u32 *)((u8 *)desc + desc->descriptor_size); >> + for (i = 0; i < (desc->app_start_offset + desc->app_size); i += 4) >> + nv_wo32(priv->ucode.obj, i, ucode_image[i/4]); >> + >> + /* map allocated memory into GMMU */ >> + ret = nvkm_gpuobj_map_vm(priv->ucode.obj, vm, NV_MEM_ACCESS_RW, >> + &priv->ucode.vma); >> + if (ret) >> + return ret; >> + >> + return ret; >> +} >> + >> +static int >> +gk20a_init_pmu_setup_sw(struct gk20a_pmu_priv *priv) >> +{ >> + struct nvkm_pmu_priv_vm *pmuvm = &priv->pmuvm; >> + int ret = 0; >> + >> + INIT_WORK(&priv->base.recv.work, gk20a_pmu_process_message); >> + >> + ret = nvkm_gpuobj_new(nv_object(priv), NULL, >> GK20A_PMU_TRACE_BUFSIZE, >> + 0, 0, &priv->trace_buf.obj); >> + if (ret) >> + return ret; >> + >> + ret = nvkm_gpuobj_map_vm(nv_gpuobj(priv->trace_buf.obj), >> pmuvm->vm, >> + NV_MEM_ACCESS_RW, >> &priv->trace_buf.vma); >> + if (ret) >> + return ret; >> + >> + return 0; >> +} >> + >> +static int >> +gk20a_pmu_bootstrap(struct gk20a_pmu_priv *priv) >> +{ >> + struct pmu_ucode_desc *desc = priv->desc; >> + u32 addr_code, addr_data, addr_load; >> + u32 i, blocks, addr_args; >> + struct pmu_cmdline_args_gk20a cmdline_args; >> + struct nvkm_pmu_priv_vm *pmuvm = &priv->pmuvm; >> + >> + nv_mask(priv, 0x0010a048, 0x01, 0x01); >> + /*bind the address*/ >> + nv_wr32(priv, 0x0010a480, >> + pmuvm->mem->addr >> 12 | >> + 0x1 << 30 | >> + 0x20000000); >> + >> + /* TBD: load all other surfaces */ >> + cmdline_args.falc_trace_size = GK20A_PMU_TRACE_BUFSIZE; >> + cmdline_args.falc_trace_dma_base = >> + lower_32_bits(priv->trace_buf.vma.offset >> >> 8); >> + cmdline_args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT; >> + cmdline_args.cpu_freq_hz = 204; >> + cmdline_args.secure_mode = 0; >> + >> + addr_args = (nv_rd32(priv, 0x0010a108) >> 9) & 0x1ff; >> + addr_args = addr_args << GK20A_PMU_DMEM_BLKSIZE2; >> + addr_args -= sizeof(struct pmu_cmdline_args_gk20a); >> + nv_debug(priv, "initiating copy to dmem\n"); >> + gk20a_pmu_copy_to_dmem(priv, addr_args, >> + (u8 *)&cmdline_args, >> + sizeof(struct pmu_cmdline_args_gk20a), 0); >> + >> + nv_wr32(priv, 0x0010a1c0, 0x1 << 24); >> + >> + addr_code = lower_32_bits((priv->ucode.vma.offset + >> + desc->app_start_offset + >> + desc->app_resident_code_offset) >> 8); >> + >> + addr_data = lower_32_bits((priv->ucode.vma.offset + >> + desc->app_start_offset + >> + desc->app_resident_data_offset) >> 8); >> + >> + addr_load = lower_32_bits((priv->ucode.vma.offset + >> + desc->bootloader_start_offset) >> 8); >> + >> + nv_wr32(priv, 0x0010a1c4, GK20A_PMU_DMAIDX_UCODE); >> + nv_debug(priv, "0x%08x\n", GK20A_PMU_DMAIDX_UCODE); >> + nv_wr32(priv, 0x0010a1c4, (addr_code)); >> + nv_debug(priv, "0x%08x\n", (addr_code)); >> + nv_wr32(priv, 0x0010a1c4, desc->app_size); >> + nv_debug(priv, "0x%08x\n", desc->app_size); >> + nv_wr32(priv, 0x0010a1c4, desc->app_resident_code_size); >> + nv_debug(priv, "0x%08x\n", desc->app_resident_code_size); >> + nv_wr32(priv, 0x0010a1c4, desc->app_imem_entry); >> + nv_debug(priv, "0x%08x\n", desc->app_imem_entry); >> + nv_wr32(priv, 0x0010a1c4, (addr_data)); >> + nv_debug(priv, "0x%08x\n", (addr_data)); >> + nv_wr32(priv, 0x0010a1c4, desc->app_resident_data_size); >> + nv_debug(priv, "0x%08x\n", desc->app_resident_data_size); >> + nv_wr32(priv, 0x0010a1c4, (addr_code)); >> + nv_debug(priv, "0x%08x\n", (addr_code)); >> + nv_wr32(priv, 0x0010a1c4, 0x1); >> + nv_debug(priv, "0x%08x\n", 1); >> + nv_wr32(priv, 0x0010a1c4, addr_args); >> + nv_debug(priv, "0x%08x\n", addr_args); >> + >> + nv_wr32(priv, 0x0010a110, >> + (addr_load) - (desc->bootloader_imem_offset >> 8)); >> + >> + blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8; >> + >> + for (i = 0; i < blocks; i++) { >> + nv_wr32(priv, 0x0010a114, >> + desc->bootloader_imem_offset + (i << 8)); >> + nv_wr32(priv, 0x0010a11c, >> + desc->bootloader_imem_offset + (i << 8)); >> + nv_wr32(priv, 0x0010a118, >> + 0x01 << 4 | >> + 0x06 << 8 | >> + ((GK20A_PMU_DMAIDX_UCODE & 0x07) << 12)); >> + } >> + >> + nv_wr32(priv, 0x0010a104, (desc->bootloader_entry_point)); >> + nv_wr32(priv, 0x0010a100, 0x1 << 1); >> + nv_wr32(priv, 0x0010a080, desc->app_version); >> + >> + return 0; >> +} >> + >> +static int >> +gk20a_init_pmu_setup_hw1(struct gk20a_pmu_priv *priv, struct nvkm_mc >> *pmc) >> +{ >> + int err; >> + >> + mutex_lock(&priv->isr_mutex); >> + err = gk20a_pmu_enable(priv, pmc, true); >> + priv->isr_enabled = (err == 0); >> + mutex_unlock(&priv->isr_mutex); >> + if (err) >> + return err; >> + >> + /* setup apertures - virtual */ >> + nv_wr32(priv, 0x10a600 + 0 * 4, 0x0); >> + nv_wr32(priv, 0x10a600 + 1 * 4, 0x0); >> + /* setup apertures - physical */ >> + nv_wr32(priv, 0x10a600 + 2 * 4, 0x4 | 0x0); >> + nv_wr32(priv, 0x10a600 + 3 * 4, 0x4 | 0x1); >> + nv_wr32(priv, 0x10a600 + 4 * 4, 0x4 | 0x2); >> + >> + /* TBD: load pmu ucode */ >> + err = gk20a_pmu_bootstrap(priv); >> + if (err) >> + return err; >> + >> + return 0; >> +} >> + >> + >> +static void >> +gk20a_pmu_intr(struct nvkm_subdev *subdev) >> +{ >> + struct gk20a_pmu_priv *priv = to_gk20a_priv(nvkm_pmu(subdev)); >> + struct nvkm_mc *pmc = nvkm_mc(priv); >> + u32 intr, mask; >> + >> + if (!priv->isr_enabled) >> + return; >> + >> + mask = nv_rd32(priv, 0x0010a018) & nv_rd32(priv, 0x0010a01c); >> + >> + intr = nv_rd32(priv, 0x0010a008) & mask; >> + >> + nv_debug(priv, "received falcon interrupt: 0x%08x\n", intr); >> + gk20a_pmu_enable_irq(priv, pmc, false); >> + >> + if (!intr || priv->pmu_state == PMU_STATE_OFF) { >> + nv_wr32(priv, 0x0010a004, intr); >> + nv_error(priv, "pmu state off\n"); >> + gk20a_pmu_enable_irq(priv, pmc, true); >> + } >> + >> + if (intr & 0x10) >> + nv_error(priv, "pmu halt intr not implemented\n"); >> + >> + if (intr & 0x20) { >> + nv_error(priv, "exterr interrupt not impl..Clear >> interrupt\n"); >> + nv_mask(priv, 0x0010a16c, (0x1 << 31), 0x00000000); >> + } >> + >> + if (intr & 0x40) { >> + nv_debug(priv, "scheduling work\n"); >> + schedule_work(&priv->base.recv.work); >> + } >> + >> + nv_wr32(priv, 0x0010a004, intr); >> + nv_debug(priv, "irq handled\n"); >> +} >> + >> +static void >> +gk20a_pmu_pgob(struct nvkm_pmu *pmu, bool enable) >> +{ >> } >> >> static int >> gk20a_pmu_init(struct nvkm_object *object) >> { >> - struct nvkm_pmu *pmu = (void *)object; >> - struct gk20a_pmu_priv *priv = (void *)pmu; >> + struct gk20a_pmu_priv *priv = (void *)object; >> + struct nvkm_mc *pmc = nvkm_mc(object); >> int ret; >> >> - ret = nvkm_subdev_init(&pmu->base); >> + ret = nvkm_subdev_init(&priv->base.base); >> if (ret) >> return ret; >> >> - pmu->pgob = nvkm_pmu_pgob; >> + priv->pmu_state = PMU_STATE_STARTING; >> + ret = gk20a_init_pmu_setup_hw1(priv, pmc); >> + if (ret) >> + return ret; >> + >> + nv_wr32(priv, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); >> + nv_wr32(priv, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); >> + nv_wr32(priv, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); >> >> - /* init pwr perf counter */ >> - nv_wr32(pmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); >> - nv_wr32(pmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); >> - nv_wr32(pmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); >> + nvkm_timer_alarm(priv, 2000000000, &priv->alarm); >> >> - nvkm_timer_alarm(pmu, 2000000000, &priv->alarm); >> return ret; >> } >> >> +static int >> +gk20a_pmu_fini(struct nvkm_object *object, bool suspend) >> +{ >> + struct gk20a_pmu_priv *priv = (void *)object; >> + struct nvkm_mc *pmc = nvkm_mc(object); >> + >> + nvkm_timer_alarm_cancel(priv, &priv->alarm); >> + >> + cancel_work_sync(&priv->base.recv.work); >> + >> + mutex_lock(&priv->isr_mutex); >> + gk20a_pmu_enable(priv, pmc, false); >> + priv->isr_enabled = false; >> + mutex_unlock(&priv->isr_mutex); >> + >> + priv->pmu_state = PMU_STATE_OFF; >> + priv->pmu_ready = false; >> + nv_wr32(priv, 0x10a014, 0x00000060); >> + >> + return nvkm_subdev_fini(&priv->base.base, suspend); >> +} >> + >> +static void >> +gk20a_pmu_dtor(struct nvkm_object *object) >> +{ >> + struct gk20a_pmu_priv *priv = (void *)object; >> + >> + nvkm_gpuobj_unmap(&priv->trace_buf.vma); >> + nvkm_gpuobj_ref(NULL, &priv->trace_buf.obj); >> + >> + nvkm_gpuobj_unmap(&priv->ucode.vma); >> + nvkm_gpuobj_ref(NULL, &priv->ucode.obj); >> + nvkm_vm_ref(NULL, &priv->pmuvm.vm, priv->pmuvm.pgd); >> + nvkm_gpuobj_ref(NULL, &priv->pmuvm.pgd); >> + nvkm_gpuobj_ref(NULL, &priv->pmuvm.mem); >> +} >> + >> static struct gk20a_pmu_dvfs_data >> -gk20a_dvfs_data= { >> +gk20a_dvfs_data = { >> .p_load_target = 70, >> .p_load_max = 90, >> .p_smooth = 1, >> @@ -205,6 +930,9 @@ gk20a_pmu_ctor(struct nvkm_object *parent, struct >> nvkm_object *engine, >> struct nvkm_object **pobject) >> { >> struct gk20a_pmu_priv *priv; >> + struct nvkm_pmu *pmu; >> + struct nvkm_mc *pmc; >> + const struct firmware *pmufw = NULL; >> int ret; >> >> ret = nvkm_pmu_create(parent, engine, oclass, &priv); >> @@ -212,10 +940,47 @@ gk20a_pmu_ctor(struct nvkm_object *parent, struct >> nvkm_object *engine, >> if (ret) >> return ret; >> >> + mutex_init(&priv->isr_mutex); >> + mutex_init(&priv->pmu_copy_lock); >> priv->data = &gk20a_dvfs_data; >> + pmu = &priv->base; >> + pmc = nvkm_mc(pmu); >> + nv_subdev(pmu)->intr = gk20a_pmu_intr; >> >> + ret = gk20a_pmu_load_firmware(pmu, &pmufw); >> + if (ret < 0) { >> + nv_error(priv, "failed to load pmu fimware\n"); >> + return ret; >> + } >> + >> + ret = gk20a_pmu_init_vm(priv, pmufw); >> + if (ret < 0) { >> + nv_error(priv, "failed to map pmu fw to va space\n"); >> + goto err; >> + } >> + >> + priv->desc = (struct pmu_ucode_desc *)pmufw->data; >> + gk20a_pmu_dump_firmware_info(pmu, pmufw); >> + >> + if (priv->desc->app_version != APP_VERSION_GK20A) { >> + nv_error(priv, "PMU version unsupported: %d\n", >> + >> priv->desc->app_version); >> + ret = -EINVAL; >> + goto err; >> + } >> + >> + ret = gk20a_init_pmu_setup_sw(priv); >> + if (ret) >> + goto err; >> + >> + pmu->pgob = nvkm_pmu_pgob; >> nvkm_alarm_init(&priv->alarm, gk20a_pmu_dvfs_work); >> + >> return 0; >> + >> +err: >> + gk20a_pmu_release_firmware(pmu, pmufw); >> + return ret; >> } >> >> struct nvkm_oclass * >> @@ -223,8 +988,10 @@ gk20a_pmu_oclass = &(struct nvkm_pmu_impl) { >> .base.handle = NV_SUBDEV(PMU, 0xea), >> .base.ofuncs = &(struct nvkm_ofuncs) { >> .ctor = gk20a_pmu_ctor, >> - .dtor = _nvkm_pmu_dtor, >> + .dtor = gk20a_pmu_dtor, >> .init = gk20a_pmu_init, >> .fini = gk20a_pmu_fini, >> }, >> + .pgob = gk20a_pmu_pgob, >> }.base; >> + >> > > _______________________________________________ > Nouveau mailing list > Nouveau@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau -- To unsubscribe from this list: send the line "unsubscribe linux-tegra" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Apr 13, 2015 at 3:38 AM, Alexandre Courbot <acourbot@nvidia.com> wrote: > From: Deepak Goyal <dgoyal@nvidia.com> > > - Maps PMU firmware into PMU virtual memory. > - Copy bootloader into PMU memory and start it. > - Allow the PMU to interact with HOST via interrupts. > > PMU after successful configurations (to follow after this patch) will: > 1.Autonomously power gate graphics engine when not in use.It will save > us a lot of power. > 2.Provide better way to scale frequencies by reporting Perf counters. > 3.Be critical for GPU functionality as future GPUs secure some register > & mem accesses involved in context switch. > > Signed-off-by: Deepak Goyal <dgoyal@nvidia.com> > Signed-off-by: Alexandre Courbot <acourbot@nvidia.com> > --- > Changes since v3: > - Cleaned formatting, renamed variables according to Nouveau standards > - (Hopefully) fixed init/fini sequence > - Removed a few more unneeded variables > > drm/nouveau/nvkm/subdev/pmu/gk20a.c | 803 +++++++++++++++++++++++++++++++++++- > 1 file changed, 785 insertions(+), 18 deletions(-) > > diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drm/nouveau/nvkm/subdev/pmu/gk20a.c > index 594f746e68f2..c206ec5e558a 100644 > --- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c > +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. > + * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. > * > * Permission is hereby granted, free of charge, to any person obtaining a > * copy of this software and associated documentation files (the "Software"), > @@ -19,14 +19,186 @@ > * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > * DEALINGS IN THE SOFTWARE. > */ > -#include "priv.h" > > +#include "priv.h" > +#include <core/client.h> > +#include <core/gpuobj.h> > +#include <subdev/bar.h> > +#include <subdev/fb.h> > +#include <subdev/mc.h> > +#include <subdev/timer.h> > +#include <subdev/mmu.h> > +#include <subdev/pmu.h> > +#include <core/object.h> > +#include <core/device.h> > +#include <linux/delay.h> > +#include <linux/firmware.h> > #include <subdev/clk.h> > #include <subdev/timer.h> > #include <subdev/volt.h> > > +#define APP_VERSION_GK20A 17997577 > +#define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) > +#define PMU_QUEUE_COUNT 5 > + > +#define GK20A_PMU_TRACE_BUFSIZE 0x4000 /* 4K */ Hmm that's 16K not 4K.. Stéphane > +#define GK20A_PMU_DMEM_BLKSIZE2 8 > +#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32 > +#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64 > + > +#define PMU_UNIT_REWIND (0x00) > +#define PMU_UNIT_PG (0x03) > +#define PMU_UNIT_INIT (0x07) > +#define PMU_UNIT_PERFMON (0x12) > +#define PMU_UNIT_THERM (0x1B) > +#define PMU_UNIT_RC (0x1F) > +#define PMU_UNIT_NULL (0x20) > +#define PMU_UNIT_END (0x23) > +#define PMU_UNIT_TEST_START (0xFE) > +#define PMU_UNIT_END_SIM (0xFF) > +#define PMU_UNIT_TEST_END (0xFF) > + > +#define PMU_UNIT_ID_IS_VALID(id) \ > + (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) > +#define PMU_DMEM_ALIGNMENT (4) > + > #define BUSY_SLOT 0 > #define CLK_SLOT 7 > +#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin" > + > +/*Choices for DMA to use*/ > +enum { > + GK20A_PMU_DMAIDX_UCODE = 0, > + GK20A_PMU_DMAIDX_VIRT = 1, > + GK20A_PMU_DMAIDX_PHYS_VID = 2, > + GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3, > + GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4, > + GK20A_PMU_DMAIDX_RSVD = 5, > + GK20A_PMU_DMAIDX_PELPG = 6, > + GK20A_PMU_DMAIDX_END = 7 > +}; > + > +struct pmu_buf_desc { > + struct nvkm_gpuobj *obj; > + struct nvkm_vma vma; > + size_t size; > +}; > + > +struct nvkm_pmu_priv_vm { > + struct nvkm_gpuobj *mem; > + struct nvkm_gpuobj *pgd; > + struct nvkm_vm *vm; > +}; > + > +/*Choices for pmu_state*/ > +enum { > + PMU_STATE_OFF, /*0 PMU is off */ > + PMU_STATE_STARTING, /*1 PMU is on, but not booted */ > + PMU_STATE_INIT_RECEIVED /*2 PMU init message received */ > +}; > + > +struct pmu_mem_gk20a { > + u32 dma_base; > + u8 dma_offset; > + u8 dma_idx; > + u16 fb_size; > +}; > + > +struct pmu_cmdline_args_gk20a { > + u32 cpu_freq_hz; /* Frequency of the clock driving PMU */ > + u32 falc_trace_size; /* falctrace buffer size (bytes) */ > + u32 falc_trace_dma_base; /* 256-byte block address */ > + u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */ > + u8 secure_mode; > + struct pmu_mem_gk20a gc6_ctx; /* dmem offset of gc6 context */ > +}; > + > +/*pmu ucode descriptor*/ > +struct pmu_ucode_desc { > + u32 descriptor_size; > + u32 image_size; > + u32 tools_version; > + u32 app_version; > + char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH]; > + u32 bootloader_start_offset; > + u32 bootloader_size; > + u32 bootloader_imem_offset; > + u32 bootloader_entry_point; > + u32 app_start_offset; > + u32 app_size; > + u32 app_imem_offset; > + u32 app_imem_entry; > + u32 app_dmem_offset; > + u32 app_resident_code_offset; > + u32 app_resident_code_size; > + u32 app_resident_data_offset; > + u32 app_resident_data_size; > + u32 nb_overlays; > + struct {u32 start; u32 size; } load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY]; > + u32 compressed; > +}; > + > +/*pmu msg header*/ > +struct pmu_hdr { > + u8 unit_id; > + u8 size; > + u8 ctrl_flags; > + u8 seq_id; > +}; > + > +#define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr) > + > +enum { > + PMU_INIT_MSG_TYPE_PMU_INIT = 0, > +}; > + > +/*pmu init msg format*/ > +struct pmu_init_msg_pmu_gk20a { > + u8 msg_type; > + u8 pad; > + u16 os_debug_entry_point; > + > + struct { > + u16 size; > + u16 offset; > + u8 index; > + u8 pad; > + } queue_info[PMU_QUEUE_COUNT]; > + > + u16 sw_managed_area_offset; > + u16 sw_managed_area_size; > +}; > + > +/*pmu init msg format*/ > +struct pmu_init_msg { > + union { > + u8 msg_type; > + struct pmu_init_msg_pmu_gk20a pmu_init_gk20a; > + }; > +}; > + > +enum { > + PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0, > +}; > + > +struct pmu_rc_msg_unhandled_cmd { > + u8 msg_type; > + u8 unit_id; > +}; > + > +struct pmu_rc_msg { > + u8 msg_type; > + struct pmu_rc_msg_unhandled_cmd unhandled_cmd; > +}; > + > +/*pmu generic msg format*/ > +struct pmu_msg { > + struct pmu_hdr hdr; > + union { > + struct pmu_init_msg init; > + struct pmu_rc_msg rc; > + } msg; > +}; > > struct gk20a_pmu_dvfs_data { > int p_load_target; > @@ -39,8 +211,19 @@ struct gk20a_pmu_priv { > struct nvkm_pmu base; > struct nvkm_alarm alarm; > struct gk20a_pmu_dvfs_data *data; > + struct pmu_ucode_desc *desc; > + struct pmu_buf_desc ucode; > + struct pmu_buf_desc trace_buf; > + struct mutex pmu_copy_lock; > + bool pmu_ready; > + int pmu_state; > + struct nvkm_pmu_priv_vm pmuvm; > + struct mutex isr_mutex; > + bool isr_enabled; > }; > > +#define to_gk20a_priv(ptr) container_of(ptr, struct gk20a_pmu_priv, base) > + > struct gk20a_pmu_dvfs_dev_status { > unsigned long total; > unsigned long busy; > @@ -48,6 +231,59 @@ struct gk20a_pmu_dvfs_dev_status { > }; > > static int > +gk20a_pmu_load_firmware(struct nvkm_pmu *pmu, const struct firmware **pfw) > +{ > + struct nvkm_device *dev; > + char fw[32]; > + > + dev = nv_device(pmu); > + snprintf(fw, sizeof(fw), "nvidia/tegra124/%s", GK20A_PMU_UCODE_IMAGE); > + return request_firmware(pfw, fw, nv_device_base(dev)); > +} > + > +static void > +gk20a_pmu_release_firmware(struct nvkm_pmu *pmu, const struct firmware *pfw) > +{ > + nv_debug(pmu, "firmware released\n"); > + release_firmware(pfw); > +} > + > +static void > +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *pmu, const struct firmware *fw) > +{ > + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; > + > + nv_debug(pmu, "GK20A PMU firmware information\n"); > + nv_debug(pmu, "descriptor size = %u\n", desc->descriptor_size); > + nv_debug(pmu, "image size = %u\n", desc->image_size); > + nv_debug(pmu, "app_version = 0x%08x\n", desc->app_version); > + nv_debug(pmu, "date = %s\n", desc->date); > + nv_debug(pmu, "bootloader_start_offset = 0x%08x\n", > + desc->bootloader_start_offset); > + nv_debug(pmu, "bootloader_size = 0x%08x\n", desc->bootloader_size); > + nv_debug(pmu, "bootloader_imem_offset = 0x%08x\n", > + desc->bootloader_imem_offset); > + nv_debug(pmu, "bootloader_entry_point = 0x%08x\n", > + desc->bootloader_entry_point); > + nv_debug(pmu, "app_start_offset = 0x%08x\n", desc->app_start_offset); > + nv_debug(pmu, "app_size = 0x%08x\n", desc->app_size); > + nv_debug(pmu, "app_imem_offset = 0x%08x\n", desc->app_imem_offset); > + nv_debug(pmu, "app_imem_entry = 0x%08x\n", desc->app_imem_entry); > + nv_debug(pmu, "app_dmem_offset = 0x%08x\n", desc->app_dmem_offset); > + nv_debug(pmu, "app_resident_code_offset = 0x%08x\n", > + desc->app_resident_code_offset); > + nv_debug(pmu, "app_resident_code_size = 0x%08x\n", > + desc->app_resident_code_size); > + nv_debug(pmu, "app_resident_data_offset = 0x%08x\n", > + desc->app_resident_data_offset); > + nv_debug(pmu, "app_resident_data_size = 0x%08x\n", > + desc->app_resident_data_size); > + nv_debug(pmu, "nb_overlays = %d\n", desc->nb_overlays); > + > + nv_debug(pmu, "compressed = %u\n", desc->compressed); > +} > + > +static int > gk20a_pmu_dvfs_target(struct gk20a_pmu_priv *priv, int *state) > { > struct nvkm_clk *clk = nvkm_clk(priv); > @@ -160,40 +396,529 @@ resched: > } > > static int > -gk20a_pmu_fini(struct nvkm_object *object, bool suspend) > +gk20a_pmu_enable_hw(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc, bool enable) > { > - struct nvkm_pmu *pmu = (void *)object; > - struct gk20a_pmu_priv *priv = (void *)pmu; > + if (enable) { > + nv_mask(pmc, 0x000200, 0x00002000, 0x00002000); > + nv_rd32(pmc, 0x00000200); > + if (nv_wait(priv, 0x0010a10c, 0x00000006, 0x00000000)) > + return 0; > + nv_mask(pmc, 0x00000200, 0x2000, 0x00000000); > + nv_error(priv, "Falcon mem scrubbing timeout\n"); > + return -ETIMEDOUT; > + } else { > + nv_mask(pmc, 0x00000200, 0x2000, 0x00000000); > + return 0; > + } > +} > +static void > +gk20a_pmu_enable_irq(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc, bool enable) > +{ > + if (enable) { > + nv_debug(priv, "enable pmu irq\n"); > + nv_wr32(priv, 0x0010a010, 0xff); > + nv_mask(pmc, 0x00000640, 0x1000000, 0x1000000); > + nv_mask(pmc, 0x00000644, 0x1000000, 0x1000000); > + } else { > + nv_debug(priv, "disable pmu irq\n"); > + nv_mask(pmc, 0x00000640, 0x1000000, 0x00000000); > + nv_mask(pmc, 0x00000644, 0x1000000, 0x00000000); > + nv_wr32(priv, 0x0010a014, 0xff); > + } > > - nvkm_timer_alarm_cancel(priv, &priv->alarm); > +} > > - return nvkm_subdev_fini(&pmu->base, suspend); > +static int > +gk20a_pmu_idle(struct gk20a_pmu_priv *priv) > +{ > + if (!nv_wait(priv, 0x0010a04c, 0x0000ffff, 0x00000000)) { > + nv_error(priv, "timeout waiting pmu idle\n"); > + return -EBUSY; > + } > + > + return 0; > +} > + > +static int > +gk20a_pmu_enable(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc, bool enable) > +{ > + u32 pmc_enable; > + int err; > + > + if (enable) { > + err = gk20a_pmu_enable_hw(priv, pmc, true); > + if (err) > + return err; > + > + err = gk20a_pmu_idle(priv); > + if (err) > + return err; > + > + gk20a_pmu_enable_irq(priv, pmc, true); > + } else { > + pmc_enable = nv_rd32(pmc, 0x200); > + if ((pmc_enable & 0x2000) != 0x0) { > + gk20a_pmu_enable_irq(priv, pmc, false); > + gk20a_pmu_enable_hw(priv, pmc, false); > + } > + } > + > + return 0; > +} > + > +static void > +gk20a_pmu_copy_to_dmem(struct gk20a_pmu_priv *priv, u32 dst, u8 *src, u32 size, > + u8 port) > +{ > + u32 i, words, bytes; > + u32 data, addr_mask; > + u32 *src_u32 = (u32 *)src; > + > + if (size == 0) { > + nv_error(priv, "size is zero\n"); > + goto out; > + } > + > + if (dst & 0x3) { > + nv_error(priv, "dst (0x%08x) not 4-byte aligned\n", dst); > + goto out; > + } > + > + mutex_lock(&priv->pmu_copy_lock); > + words = size >> 2; > + bytes = size & 0x3; > + addr_mask = 0xfffc; > + dst &= addr_mask; > + > + nv_wr32(priv, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24))); > + > + for (i = 0; i < words; i++) { > + nv_wr32(priv, (0x10a1c4 + (port * 8)), src_u32[i]); > + nv_debug(priv, "0x%08x\n", src_u32[i]); > + } > + > + if (bytes > 0) { > + data = 0; > + for (i = 0; i < bytes; i++) > + ((u8 *)&data)[i] = src[(words << 2) + i]; > + nv_wr32(priv, (0x10a1c4 + (port * 8)), data); > + nv_debug(priv, "0x%08x\n", data); > + } > + > + data = nv_rd32(priv, (0x10a1c0 + (port * 8))) & addr_mask; > + size = ALIGN(size, 4); > + if (data != dst + size) { > + nv_error(priv, "copy failed.... bytes written %d, expected %d", > + data - dst, size); > + } > + mutex_unlock(&priv->pmu_copy_lock); > +out: > + nv_debug(priv, "exit %s\n", __func__); > +} > + > +static void > +gk20a_copy_from_dmem(struct gk20a_pmu_priv *priv, u32 src, u8 *dst, u32 size, > + u8 port) > +{ > + u32 i, words, bytes; > + u32 data, addr_mask; > + u32 *dst_u32 = (u32 *)dst; > + > + if (size == 0) { > + nv_error(priv, "size is zero\n"); > + goto out; > + } > + > + if (src & 0x3) { > + nv_error(priv, "src (0x%08x) not 4-byte aligned\n", src); > + goto out; > + } > + > + mutex_lock(&priv->pmu_copy_lock); > + > + words = size >> 2; > + bytes = size & 0x3; > + > + addr_mask = 0xfffc; > + > + src &= addr_mask; > + > + nv_wr32(priv, (0x10a1c0 + (port * 8)), (src | (0x1 << 25))); > + > + for (i = 0; i < words; i++) { > + dst_u32[i] = nv_rd32(priv, (0x0010a1c4 + port * 8)); > + nv_debug(priv, "0x%08x\n", dst_u32[i]); > + } > + if (bytes > 0) { > + data = nv_rd32(priv, (0x0010a1c4 + port * 8)); > + nv_debug(priv, "0x%08x\n", data); > + > + for (i = 0; i < bytes; i++) > + dst[(words << 2) + i] = ((u8 *)&data)[i]; > + } > + mutex_unlock(&priv->pmu_copy_lock); > +out: > + nv_debug(priv, "exit %s\n", __func__); > +} > + > +static int > +gk20a_pmu_process_init_msg(struct gk20a_pmu_priv *priv, struct pmu_msg *msg) > +{ > + struct pmu_init_msg_pmu_gk20a *init; > + u32 tail; > + > + tail = nv_rd32(priv, 0x0010a4cc); > + > + gk20a_copy_from_dmem(priv, tail, (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0); > + > + if (msg->hdr.unit_id != PMU_UNIT_INIT) { > + nv_error(priv, "expecting init msg\n"); > + return -EINVAL; > + } > + > + gk20a_copy_from_dmem(priv, tail + PMU_MSG_HDR_SIZE, > + (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0); > + > + if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { > + nv_error(priv, "expecting init msg\n"); > + return -EINVAL; > + } > + > + tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT); > + nv_wr32(priv, 0x0010a4cc, tail); > + init = &msg->msg.init.pmu_init_gk20a; > + priv->pmu_ready = true; > + priv->pmu_state = PMU_STATE_INIT_RECEIVED; > + nv_debug(priv, "init msg processed\n"); > + return 0; > +} > + > +static void > +gk20a_pmu_process_message(struct work_struct *work) > +{ > + struct nvkm_pmu *pmu = container_of(work, struct nvkm_pmu, recv.work); > + struct gk20a_pmu_priv *priv = to_gk20a_priv(pmu); > + struct pmu_msg msg; > + struct nvkm_mc *pmc = nvkm_mc(pmu); > + > + mutex_lock(&priv->isr_mutex); > + if (unlikely(!priv->pmu_ready)) { > + nv_debug(pmu, "processing init msg\n"); > + gk20a_pmu_process_init_msg(priv, &msg); > + mutex_unlock(&priv->isr_mutex); > + gk20a_pmu_enable_irq(priv, pmc, true); > + } else { > + mutex_unlock(&priv->isr_mutex); > + } > +} > + > +static int > +gk20a_pmu_init_vm(struct gk20a_pmu_priv *priv, const struct firmware *fw) > +{ > + int ret = 0; > + u32 *ucode_image; > + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; > + int i; > + struct nvkm_pmu_priv_vm *pmuvm = &priv->pmuvm; > + struct nvkm_device *device = nv_device(&priv->base); > + struct nvkm_vm *vm; > + const u64 pmu_area_len = 300*1024; > + > + /* mem for inst blk*/ > + ret = nvkm_gpuobj_new(nv_object(priv), NULL, 0x1000, 0, 0, &pmuvm->mem); > + if (ret) > + return ret; > + > + /* mem for pgd*/ > + ret = nvkm_gpuobj_new(nv_object(priv), NULL, 0x8000, 0, 0, &pmuvm->pgd); > + if (ret) > + return ret; > + > + /*allocate virtual memory range*/ > + ret = nvkm_vm_new(device, 0, pmu_area_len, 0, &vm); > + if (ret) > + return ret; > + > + atomic_inc(&vm->engref[NVDEV_SUBDEV_PMU]); > + > + /* update VM with pgd */ > + ret = nvkm_vm_ref(vm, &pmuvm->vm, pmuvm->pgd); > + if (ret) > + return ret; > + > + /*update pgd in inst blk */ > + nv_wo32(pmuvm->mem, 0x0200, lower_32_bits(pmuvm->pgd->addr)); > + nv_wo32(pmuvm->mem, 0x0204, upper_32_bits(pmuvm->pgd->addr)); > + nv_wo32(pmuvm->mem, 0x0208, lower_32_bits(pmu_area_len - 1)); > + nv_wo32(pmuvm->mem, 0x020c, upper_32_bits(pmu_area_len - 1)); > + > + /* allocate memory for pmu fw to be copied to*/ > + ret = nvkm_gpuobj_new(nv_object(priv), NULL, GK20A_PMU_UCODE_SIZE_MAX, > + 0x1000, 0, &priv->ucode.obj); > + if (ret) > + return ret; > + > + ucode_image = (u32 *)((u8 *)desc + desc->descriptor_size); > + for (i = 0; i < (desc->app_start_offset + desc->app_size); i += 4) > + nv_wo32(priv->ucode.obj, i, ucode_image[i/4]); > + > + /* map allocated memory into GMMU */ > + ret = nvkm_gpuobj_map_vm(priv->ucode.obj, vm, NV_MEM_ACCESS_RW, > + &priv->ucode.vma); > + if (ret) > + return ret; > + > + return ret; > +} > + > +static int > +gk20a_init_pmu_setup_sw(struct gk20a_pmu_priv *priv) > +{ > + struct nvkm_pmu_priv_vm *pmuvm = &priv->pmuvm; > + int ret = 0; > + > + INIT_WORK(&priv->base.recv.work, gk20a_pmu_process_message); > + > + ret = nvkm_gpuobj_new(nv_object(priv), NULL, GK20A_PMU_TRACE_BUFSIZE, > + 0, 0, &priv->trace_buf.obj); > + if (ret) > + return ret; > + > + ret = nvkm_gpuobj_map_vm(nv_gpuobj(priv->trace_buf.obj), pmuvm->vm, > + NV_MEM_ACCESS_RW, &priv->trace_buf.vma); > + if (ret) > + return ret; > + > + return 0; > +} > + > +static int > +gk20a_pmu_bootstrap(struct gk20a_pmu_priv *priv) > +{ > + struct pmu_ucode_desc *desc = priv->desc; > + u32 addr_code, addr_data, addr_load; > + u32 i, blocks, addr_args; > + struct pmu_cmdline_args_gk20a cmdline_args; > + struct nvkm_pmu_priv_vm *pmuvm = &priv->pmuvm; > + > + nv_mask(priv, 0x0010a048, 0x01, 0x01); > + /*bind the address*/ > + nv_wr32(priv, 0x0010a480, > + pmuvm->mem->addr >> 12 | > + 0x1 << 30 | > + 0x20000000); > + > + /* TBD: load all other surfaces */ > + cmdline_args.falc_trace_size = GK20A_PMU_TRACE_BUFSIZE; > + cmdline_args.falc_trace_dma_base = > + lower_32_bits(priv->trace_buf.vma.offset >> 8); > + cmdline_args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT; > + cmdline_args.cpu_freq_hz = 204; > + cmdline_args.secure_mode = 0; > + > + addr_args = (nv_rd32(priv, 0x0010a108) >> 9) & 0x1ff; > + addr_args = addr_args << GK20A_PMU_DMEM_BLKSIZE2; > + addr_args -= sizeof(struct pmu_cmdline_args_gk20a); > + nv_debug(priv, "initiating copy to dmem\n"); > + gk20a_pmu_copy_to_dmem(priv, addr_args, > + (u8 *)&cmdline_args, > + sizeof(struct pmu_cmdline_args_gk20a), 0); > + > + nv_wr32(priv, 0x0010a1c0, 0x1 << 24); > + > + addr_code = lower_32_bits((priv->ucode.vma.offset + > + desc->app_start_offset + > + desc->app_resident_code_offset) >> 8); > + > + addr_data = lower_32_bits((priv->ucode.vma.offset + > + desc->app_start_offset + > + desc->app_resident_data_offset) >> 8); > + > + addr_load = lower_32_bits((priv->ucode.vma.offset + > + desc->bootloader_start_offset) >> 8); > + > + nv_wr32(priv, 0x0010a1c4, GK20A_PMU_DMAIDX_UCODE); > + nv_debug(priv, "0x%08x\n", GK20A_PMU_DMAIDX_UCODE); > + nv_wr32(priv, 0x0010a1c4, (addr_code)); > + nv_debug(priv, "0x%08x\n", (addr_code)); > + nv_wr32(priv, 0x0010a1c4, desc->app_size); > + nv_debug(priv, "0x%08x\n", desc->app_size); > + nv_wr32(priv, 0x0010a1c4, desc->app_resident_code_size); > + nv_debug(priv, "0x%08x\n", desc->app_resident_code_size); > + nv_wr32(priv, 0x0010a1c4, desc->app_imem_entry); > + nv_debug(priv, "0x%08x\n", desc->app_imem_entry); > + nv_wr32(priv, 0x0010a1c4, (addr_data)); > + nv_debug(priv, "0x%08x\n", (addr_data)); > + nv_wr32(priv, 0x0010a1c4, desc->app_resident_data_size); > + nv_debug(priv, "0x%08x\n", desc->app_resident_data_size); > + nv_wr32(priv, 0x0010a1c4, (addr_code)); > + nv_debug(priv, "0x%08x\n", (addr_code)); > + nv_wr32(priv, 0x0010a1c4, 0x1); > + nv_debug(priv, "0x%08x\n", 1); > + nv_wr32(priv, 0x0010a1c4, addr_args); > + nv_debug(priv, "0x%08x\n", addr_args); > + > + nv_wr32(priv, 0x0010a110, > + (addr_load) - (desc->bootloader_imem_offset >> 8)); > + > + blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8; > + > + for (i = 0; i < blocks; i++) { > + nv_wr32(priv, 0x0010a114, > + desc->bootloader_imem_offset + (i << 8)); > + nv_wr32(priv, 0x0010a11c, > + desc->bootloader_imem_offset + (i << 8)); > + nv_wr32(priv, 0x0010a118, > + 0x01 << 4 | > + 0x06 << 8 | > + ((GK20A_PMU_DMAIDX_UCODE & 0x07) << 12)); > + } > + > + nv_wr32(priv, 0x0010a104, (desc->bootloader_entry_point)); > + nv_wr32(priv, 0x0010a100, 0x1 << 1); > + nv_wr32(priv, 0x0010a080, desc->app_version); > + > + return 0; > +} > + > +static int > +gk20a_init_pmu_setup_hw1(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc) > +{ > + int err; > + > + mutex_lock(&priv->isr_mutex); > + err = gk20a_pmu_enable(priv, pmc, true); > + priv->isr_enabled = (err == 0); > + mutex_unlock(&priv->isr_mutex); > + if (err) > + return err; > + > + /* setup apertures - virtual */ > + nv_wr32(priv, 0x10a600 + 0 * 4, 0x0); > + nv_wr32(priv, 0x10a600 + 1 * 4, 0x0); > + /* setup apertures - physical */ > + nv_wr32(priv, 0x10a600 + 2 * 4, 0x4 | 0x0); > + nv_wr32(priv, 0x10a600 + 3 * 4, 0x4 | 0x1); > + nv_wr32(priv, 0x10a600 + 4 * 4, 0x4 | 0x2); > + > + /* TBD: load pmu ucode */ > + err = gk20a_pmu_bootstrap(priv); > + if (err) > + return err; > + > + return 0; > +} > + > + > +static void > +gk20a_pmu_intr(struct nvkm_subdev *subdev) > +{ > + struct gk20a_pmu_priv *priv = to_gk20a_priv(nvkm_pmu(subdev)); > + struct nvkm_mc *pmc = nvkm_mc(priv); > + u32 intr, mask; > + > + if (!priv->isr_enabled) > + return; > + > + mask = nv_rd32(priv, 0x0010a018) & nv_rd32(priv, 0x0010a01c); > + > + intr = nv_rd32(priv, 0x0010a008) & mask; > + > + nv_debug(priv, "received falcon interrupt: 0x%08x\n", intr); > + gk20a_pmu_enable_irq(priv, pmc, false); > + > + if (!intr || priv->pmu_state == PMU_STATE_OFF) { > + nv_wr32(priv, 0x0010a004, intr); > + nv_error(priv, "pmu state off\n"); > + gk20a_pmu_enable_irq(priv, pmc, true); > + } > + > + if (intr & 0x10) > + nv_error(priv, "pmu halt intr not implemented\n"); > + > + if (intr & 0x20) { > + nv_error(priv, "exterr interrupt not impl..Clear interrupt\n"); > + nv_mask(priv, 0x0010a16c, (0x1 << 31), 0x00000000); > + } > + > + if (intr & 0x40) { > + nv_debug(priv, "scheduling work\n"); > + schedule_work(&priv->base.recv.work); > + } > + > + nv_wr32(priv, 0x0010a004, intr); > + nv_debug(priv, "irq handled\n"); > +} > + > +static void > +gk20a_pmu_pgob(struct nvkm_pmu *pmu, bool enable) > +{ > } > > static int > gk20a_pmu_init(struct nvkm_object *object) > { > - struct nvkm_pmu *pmu = (void *)object; > - struct gk20a_pmu_priv *priv = (void *)pmu; > + struct gk20a_pmu_priv *priv = (void *)object; > + struct nvkm_mc *pmc = nvkm_mc(object); > int ret; > > - ret = nvkm_subdev_init(&pmu->base); > + ret = nvkm_subdev_init(&priv->base.base); > if (ret) > return ret; > > - pmu->pgob = nvkm_pmu_pgob; > + priv->pmu_state = PMU_STATE_STARTING; > + ret = gk20a_init_pmu_setup_hw1(priv, pmc); > + if (ret) > + return ret; > + > + nv_wr32(priv, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); > + nv_wr32(priv, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); > + nv_wr32(priv, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); > > - /* init pwr perf counter */ > - nv_wr32(pmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); > - nv_wr32(pmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); > - nv_wr32(pmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); > + nvkm_timer_alarm(priv, 2000000000, &priv->alarm); > > - nvkm_timer_alarm(pmu, 2000000000, &priv->alarm); > return ret; > } > > +static int > +gk20a_pmu_fini(struct nvkm_object *object, bool suspend) > +{ > + struct gk20a_pmu_priv *priv = (void *)object; > + struct nvkm_mc *pmc = nvkm_mc(object); > + > + nvkm_timer_alarm_cancel(priv, &priv->alarm); > + > + cancel_work_sync(&priv->base.recv.work); > + > + mutex_lock(&priv->isr_mutex); > + gk20a_pmu_enable(priv, pmc, false); > + priv->isr_enabled = false; > + mutex_unlock(&priv->isr_mutex); > + > + priv->pmu_state = PMU_STATE_OFF; > + priv->pmu_ready = false; > + nv_wr32(priv, 0x10a014, 0x00000060); > + > + return nvkm_subdev_fini(&priv->base.base, suspend); > +} > + > +static void > +gk20a_pmu_dtor(struct nvkm_object *object) > +{ > + struct gk20a_pmu_priv *priv = (void *)object; > + > + nvkm_gpuobj_unmap(&priv->trace_buf.vma); > + nvkm_gpuobj_ref(NULL, &priv->trace_buf.obj); > + > + nvkm_gpuobj_unmap(&priv->ucode.vma); > + nvkm_gpuobj_ref(NULL, &priv->ucode.obj); > + nvkm_vm_ref(NULL, &priv->pmuvm.vm, priv->pmuvm.pgd); > + nvkm_gpuobj_ref(NULL, &priv->pmuvm.pgd); > + nvkm_gpuobj_ref(NULL, &priv->pmuvm.mem); > +} > + > static struct gk20a_pmu_dvfs_data > -gk20a_dvfs_data= { > +gk20a_dvfs_data = { > .p_load_target = 70, > .p_load_max = 90, > .p_smooth = 1, > @@ -205,6 +930,9 @@ gk20a_pmu_ctor(struct nvkm_object *parent, struct nvkm_object *engine, > struct nvkm_object **pobject) > { > struct gk20a_pmu_priv *priv; > + struct nvkm_pmu *pmu; > + struct nvkm_mc *pmc; > + const struct firmware *pmufw = NULL; > int ret; > > ret = nvkm_pmu_create(parent, engine, oclass, &priv); > @@ -212,10 +940,47 @@ gk20a_pmu_ctor(struct nvkm_object *parent, struct nvkm_object *engine, > if (ret) > return ret; > > + mutex_init(&priv->isr_mutex); > + mutex_init(&priv->pmu_copy_lock); > priv->data = &gk20a_dvfs_data; > + pmu = &priv->base; > + pmc = nvkm_mc(pmu); > + nv_subdev(pmu)->intr = gk20a_pmu_intr; > > + ret = gk20a_pmu_load_firmware(pmu, &pmufw); > + if (ret < 0) { > + nv_error(priv, "failed to load pmu fimware\n"); > + return ret; > + } > + > + ret = gk20a_pmu_init_vm(priv, pmufw); > + if (ret < 0) { > + nv_error(priv, "failed to map pmu fw to va space\n"); > + goto err; > + } > + > + priv->desc = (struct pmu_ucode_desc *)pmufw->data; > + gk20a_pmu_dump_firmware_info(pmu, pmufw); > + > + if (priv->desc->app_version != APP_VERSION_GK20A) { > + nv_error(priv, "PMU version unsupported: %d\n", > + priv->desc->app_version); > + ret = -EINVAL; > + goto err; > + } > + > + ret = gk20a_init_pmu_setup_sw(priv); > + if (ret) > + goto err; > + > + pmu->pgob = nvkm_pmu_pgob; > nvkm_alarm_init(&priv->alarm, gk20a_pmu_dvfs_work); > + > return 0; > + > +err: > + gk20a_pmu_release_firmware(pmu, pmufw); > + return ret; > } > > struct nvkm_oclass * > @@ -223,8 +988,10 @@ gk20a_pmu_oclass = &(struct nvkm_pmu_impl) { > .base.handle = NV_SUBDEV(PMU, 0xea), > .base.ofuncs = &(struct nvkm_ofuncs) { > .ctor = gk20a_pmu_ctor, > - .dtor = _nvkm_pmu_dtor, > + .dtor = gk20a_pmu_dtor, > .init = gk20a_pmu_init, > .fini = gk20a_pmu_fini, > }, > + .pgob = gk20a_pmu_pgob, > }.base; > + > -- > 2.3.5 > > _______________________________________________ > Nouveau mailing list > Nouveau@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau -- To unsubscribe from this list: send the line "unsubscribe linux-tegra" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Ben, On Fri, May 1, 2015 at 8:01 AM, Ben Skeggs <skeggsb@gmail.com> wrote: > On 13 April 2015 at 20:42, Alexandre Courbot <acourbot@nvidia.com> wrote: >> Ben, I guess our main remaining concern with this patch is how it should >> integrate wrt. the existing PMU code. Since it is designed to interact with >> the NVIDIA firmware, maybe we should use a different base code, or do you >> think we can somehow share code and data structures? > Hey Alexandre, > > Sorry for the delay in responding to this. It is my turn to apologize - I was (well still am, technically :)) on holidays and have just started unpiling my inbox... > > My original thinking with transitioning to use NVIDIA's firmware was > that I'd modify our firmware interfaces to match yours, and share the > code. I haven't started on any of this yet due to not having any word > on how you guys will be shipping the images, etc. It would be nice to > have some communication on these things :) Indeed. For the first time with Maxwell GPUs, NVIDIA-provided firmware will be required for GPUs to operate properly. This raises several questions: - Should the firmware be released under /lib/firmware/nouveau or /lib/firmware/nvidia ? (this directory already exists for Tegra USB firmware and makes more sense to me, since the firmware is not Nouveau-specific) - For GPCCS/FECS firmware, should we release the netlist "pack" file or adopt the same format as Nouveau does? (1 file per firmware) - Should we keep the current files names (e.g. nvxx_fucxxxx[cd]), or try to switch to more meaningful ones? - What about signature files that are required for secure boot? - Knowing that NVIDIA's firmware ABI is a (very slowly) moving target, it is worth to aim at ABI compatibility, or should we assume different paths for Nouveau and NVIDIA firmware? If ABI incompatibilities are introduced in the way, how do we handle versioning? All these issues make me tend towards having a separate handling of NVIDIA-released firmware (location, format, and ABI). It will also make the firmware easier to release if conversions are not necessary on the way out. What are your thoughts on this? > I'm suspecting you won't be wanting to modify our falcon assembly, so > I guess I'll set aside some time to use this patch as a base and > transition our ucode to boot using it? Then you guys can build more > stuff on top of that. I'm also happy to let you modify our ucode if > you wish :) There may be legal issues with us touching the Nouveau firmware. But as I stated above, the first question is do we want to bother with this compatiblity at all? -- To unsubscribe from this list: send the line "unsubscribe linux-tegra" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12 May 2015 at 19:04, Alexandre Courbot <gnurou@gmail.com> wrote: > Hi Ben, > > On Fri, May 1, 2015 at 8:01 AM, Ben Skeggs <skeggsb@gmail.com> wrote: >> On 13 April 2015 at 20:42, Alexandre Courbot <acourbot@nvidia.com> wrote: >>> Ben, I guess our main remaining concern with this patch is how it should >>> integrate wrt. the existing PMU code. Since it is designed to interact with >>> the NVIDIA firmware, maybe we should use a different base code, or do you >>> think we can somehow share code and data structures? >> Hey Alexandre, >> >> Sorry for the delay in responding to this. > > It is my turn to apologize - I was (well still am, technically :)) on > holidays and have just started unpiling my inbox... > >> >> My original thinking with transitioning to use NVIDIA's firmware was >> that I'd modify our firmware interfaces to match yours, and share the >> code. I haven't started on any of this yet due to not having any word >> on how you guys will be shipping the images, etc. It would be nice to >> have some communication on these things :) > > Indeed. For the first time with Maxwell GPUs, NVIDIA-provided firmware > will be required for GPUs to operate properly. This raises several > questions: > > - Should the firmware be released under /lib/firmware/nouveau or > /lib/firmware/nvidia ? (this directory already exists for Tegra USB > firmware and makes more sense to me, since the firmware is not > Nouveau-specific) I think /lib/firmware/nvidia makes sense here too. > - For GPCCS/FECS firmware, should we release the netlist "pack" file > or adopt the same format as Nouveau does? (1 file per firmware) > - Should we keep the current files names (e.g. nvxx_fucxxxx[cd]), or > try to switch to more meaningful ones? I'd actually prefer to have the entire netlists bundled, that gives us updated reg/ctx production values too as you guys tweak/update them for hw bugs (etc). They're also nicer in that you get a single bundle of everything that's required for that chipset+engine. I don't have too much opinion on naming. The current model of nv{CHIPSET}_{UCODE_TYPE}{REGISTER_BASE}[CODE,DATA] was just nice and convenient to snprintf into a buffer :) > - What about signature files that are required for secure boot? As with above, if it's possible to ship them in a single file with the ucode that it belongs to, that'd be ideal. It's not a huge deal though. > - Knowing that NVIDIA's firmware ABI is a (very slowly) moving target, > it is worth to aim at ABI compatibility, or should we assume different > paths for Nouveau and NVIDIA firmware? If ABI incompatibilities are > introduced in the way, how do we handle versioning? For incompatible changes, I think appending a -VERSION to each firmware blob is probably the simplest approach. The driver can select the necessary codepath based on what it finds (probably trying newer versions first, obviously). > > All these issues make me tend towards having a separate handling of > NVIDIA-released firmware (location, format, and ABI). It will also > make the firmware easier to release if conversions are not necessary > on the way out. What are your thoughts on this? I'm not entirely set here, either way. I somewhat think that initially I should adapt our interfaces to match, and a keep single code path as long as we can. A lot of the changes so far have seemed minor enough we could stick conditionals on firmware version, and if fw changes come along that warrant a radical change in handling from the host, we can abstract it away then. But, I'm open to other suggestions :) Ben. > >> I'm suspecting you won't be wanting to modify our falcon assembly, so >> I guess I'll set aside some time to use this patch as a base and >> transition our ucode to boot using it? Then you guys can build more >> stuff on top of that. I'm also happy to let you modify our ucode if >> you wish :) > > There may be legal issues with us touching the Nouveau firmware. But > as I stated above, the first question is do we want to bother with > this compatiblity at all? -- To unsubscribe from this list: send the line "unsubscribe linux-tegra" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 15, 2015 at 2:37 PM, Ben Skeggs <skeggsb@gmail.com> wrote: > On 12 May 2015 at 19:04, Alexandre Courbot <gnurou@gmail.com> wrote: >> Hi Ben, >> >> On Fri, May 1, 2015 at 8:01 AM, Ben Skeggs <skeggsb@gmail.com> wrote: >>> On 13 April 2015 at 20:42, Alexandre Courbot <acourbot@nvidia.com> wrote: >>>> Ben, I guess our main remaining concern with this patch is how it should >>>> integrate wrt. the existing PMU code. Since it is designed to interact with >>>> the NVIDIA firmware, maybe we should use a different base code, or do you >>>> think we can somehow share code and data structures? >>> Hey Alexandre, >>> >>> Sorry for the delay in responding to this. >> >> It is my turn to apologize - I was (well still am, technically :)) on >> holidays and have just started unpiling my inbox... >> >>> >>> My original thinking with transitioning to use NVIDIA's firmware was >>> that I'd modify our firmware interfaces to match yours, and share the >>> code. I haven't started on any of this yet due to not having any word >>> on how you guys will be shipping the images, etc. It would be nice to >>> have some communication on these things :) >> >> Indeed. For the first time with Maxwell GPUs, NVIDIA-provided firmware >> will be required for GPUs to operate properly. This raises several >> questions: >> >> - Should the firmware be released under /lib/firmware/nouveau or >> /lib/firmware/nvidia ? (this directory already exists for Tegra USB >> firmware and makes more sense to me, since the firmware is not >> Nouveau-specific) > I think /lib/firmware/nvidia makes sense here too. > >> - For GPCCS/FECS firmware, should we release the netlist "pack" file >> or adopt the same format as Nouveau does? (1 file per firmware) >> - Should we keep the current files names (e.g. nvxx_fucxxxx[cd]), or >> try to switch to more meaningful ones? > I'd actually prefer to have the entire netlists bundled, that gives us > updated reg/ctx production values too as you guys tweak/update them > for hw bugs (etc). They're also nicer in that you get a single bundle > of everything that's required for that chipset+engine. Good for me - actually that's the solution I implemented first before deciding to go "à la Nouveau". ;) Some extra code will be needed, but nothing crazy, and we will limit that feature to these chips for which NVIDIA officially provides the firmware. > I don't have too much opinion on naming. The current model of > nv{CHIPSET}_{UCODE_TYPE}{REGISTER_BASE}[CODE,DATA] was just nice and > convenient to snprintf into a buffer :) Since the firmware will be provided by us, shall we store it into nvidia/<chip>/gpcfe.bin on linux-firmware? >> - What about signature files that are required for secure boot? > As with above, if it's possible to ship them in a single file with the > ucode that it belongs to, that'd be ideal. It's not a huge deal > though. So here we actually have several files - it is kind of a mess actually. However I could probably merge them into a single netlist file like we did for the GPC/FE CS code. >> - Knowing that NVIDIA's firmware ABI is a (very slowly) moving target, >> it is worth to aim at ABI compatibility, or should we assume different >> paths for Nouveau and NVIDIA firmware? If ABI incompatibilities are >> introduced in the way, how do we handle versioning? > For incompatible changes, I think appending a -VERSION to each > firmware blob is probably the simplest approach. The driver can > select the necessary codepath based on what it finds (probably trying > newer versions first, obviously). I agree. Hopefully we won't have too much of this. >> All these issues make me tend towards having a separate handling of >> NVIDIA-released firmware (location, format, and ABI). It will also >> make the firmware easier to release if conversions are not necessary >> on the way out. What are your thoughts on this? > I'm not entirely set here, either way. I somewhat think that > initially I should adapt our interfaces to match, and a keep single > code path as long as we can. A lot of the changes so far have seemed > minor enough we could stick conditionals on firmware version, and if > fw changes come along that warrant a radical change in handling from > the host, we can abstract it away then. > > But, I'm open to other suggestions :) Right now my main concern is to get secure boot support in, and the shortest path seems to be to not care about ABI compatibility between Nouveau and NV firmwares (at least for PMU). Basically I am hoping that you will agree to proceed this way in a first time. ^_^ -- To unsubscribe from this list: send the line "unsubscribe linux-tegra" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 21 May 2015 at 16:03, Alexandre Courbot <gnurou@gmail.com> wrote: > On Fri, May 15, 2015 at 2:37 PM, Ben Skeggs <skeggsb@gmail.com> wrote: >> On 12 May 2015 at 19:04, Alexandre Courbot <gnurou@gmail.com> wrote: >>> Hi Ben, >>> >>> On Fri, May 1, 2015 at 8:01 AM, Ben Skeggs <skeggsb@gmail.com> wrote: >>>> On 13 April 2015 at 20:42, Alexandre Courbot <acourbot@nvidia.com> wrote: >>>>> Ben, I guess our main remaining concern with this patch is how it should >>>>> integrate wrt. the existing PMU code. Since it is designed to interact with >>>>> the NVIDIA firmware, maybe we should use a different base code, or do you >>>>> think we can somehow share code and data structures? >>>> Hey Alexandre, >>>> >>>> Sorry for the delay in responding to this. >>> >>> It is my turn to apologize - I was (well still am, technically :)) on >>> holidays and have just started unpiling my inbox... >>> >>>> >>>> My original thinking with transitioning to use NVIDIA's firmware was >>>> that I'd modify our firmware interfaces to match yours, and share the >>>> code. I haven't started on any of this yet due to not having any word >>>> on how you guys will be shipping the images, etc. It would be nice to >>>> have some communication on these things :) >>> >>> Indeed. For the first time with Maxwell GPUs, NVIDIA-provided firmware >>> will be required for GPUs to operate properly. This raises several >>> questions: >>> >>> - Should the firmware be released under /lib/firmware/nouveau or >>> /lib/firmware/nvidia ? (this directory already exists for Tegra USB >>> firmware and makes more sense to me, since the firmware is not >>> Nouveau-specific) >> I think /lib/firmware/nvidia makes sense here too. >> >>> - For GPCCS/FECS firmware, should we release the netlist "pack" file >>> or adopt the same format as Nouveau does? (1 file per firmware) >>> - Should we keep the current files names (e.g. nvxx_fucxxxx[cd]), or >>> try to switch to more meaningful ones? >> I'd actually prefer to have the entire netlists bundled, that gives us >> updated reg/ctx production values too as you guys tweak/update them >> for hw bugs (etc). They're also nicer in that you get a single bundle >> of everything that's required for that chipset+engine. > > Good for me - actually that's the solution I implemented first before > deciding to go "à la Nouveau". ;) Some extra code will be needed, but > nothing crazy, and we will limit that feature to these chips for which > NVIDIA officially provides the firmware. > >> I don't have too much opinion on naming. The current model of >> nv{CHIPSET}_{UCODE_TYPE}{REGISTER_BASE}[CODE,DATA] was just nice and >> convenient to snprintf into a buffer :) > > Since the firmware will be provided by us, shall we store it into > nvidia/<chip>/gpcfe.bin on linux-firmware? Sounds fine. > >>> - What about signature files that are required for secure boot? >> As with above, if it's possible to ship them in a single file with the >> ucode that it belongs to, that'd be ideal. It's not a huge deal >> though. > > So here we actually have several files - it is kind of a mess > actually. However I could probably merge them into a single netlist > file like we did for the GPC/FE CS code. If that's possible, that'd be great. > >>> - Knowing that NVIDIA's firmware ABI is a (very slowly) moving target, >>> it is worth to aim at ABI compatibility, or should we assume different >>> paths for Nouveau and NVIDIA firmware? If ABI incompatibilities are >>> introduced in the way, how do we handle versioning? >> For incompatible changes, I think appending a -VERSION to each >> firmware blob is probably the simplest approach. The driver can >> select the necessary codepath based on what it finds (probably trying >> newer versions first, obviously). > > I agree. Hopefully we won't have too much of this. > >>> All these issues make me tend towards having a separate handling of >>> NVIDIA-released firmware (location, format, and ABI). It will also >>> make the firmware easier to release if conversions are not necessary >>> on the way out. What are your thoughts on this? >> I'm not entirely set here, either way. I somewhat think that >> initially I should adapt our interfaces to match, and a keep single >> code path as long as we can. A lot of the changes so far have seemed >> minor enough we could stick conditionals on firmware version, and if >> fw changes come along that warrant a radical change in handling from >> the host, we can abstract it away then. >> >> But, I'm open to other suggestions :) > > Right now my main concern is to get secure boot support in, and the > shortest path seems to be to not care about ABI compatibility between > Nouveau and NV firmwares (at least for PMU). Basically I am hoping > that you will agree to proceed this way in a first time. ^_^ If it's really that urgent, my suggestion is to submit the patches as you did previously, keeping them confined to GK20A/GM20B, and I'll do something nicer to support boards in general. Frankly, I'd have had secure boot support in the driver already had NVIDIA played things a little differently... Going forward, I'd like to see NVIDIA not treating the Tegra parts of nouveau as a little black box where anything goes and things are different from the rest of the driver. The chips operate, for the most part, identically to their big siblings... Thanks, Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-tegra" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 21 May 2015 at 18:46, Ben Skeggs <skeggsb@gmail.com> wrote: > On 21 May 2015 at 16:03, Alexandre Courbot <gnurou@gmail.com> wrote: >> On Fri, May 15, 2015 at 2:37 PM, Ben Skeggs <skeggsb@gmail.com> wrote: >>> On 12 May 2015 at 19:04, Alexandre Courbot <gnurou@gmail.com> wrote: >>>> Hi Ben, >>>> >>>> On Fri, May 1, 2015 at 8:01 AM, Ben Skeggs <skeggsb@gmail.com> wrote: >>>>> On 13 April 2015 at 20:42, Alexandre Courbot <acourbot@nvidia.com> wrote: >>>>>> Ben, I guess our main remaining concern with this patch is how it should >>>>>> integrate wrt. the existing PMU code. Since it is designed to interact with >>>>>> the NVIDIA firmware, maybe we should use a different base code, or do you >>>>>> think we can somehow share code and data structures? >>>>> Hey Alexandre, >>>>> >>>>> Sorry for the delay in responding to this. >>>> >>>> It is my turn to apologize - I was (well still am, technically :)) on >>>> holidays and have just started unpiling my inbox... >>>> >>>>> >>>>> My original thinking with transitioning to use NVIDIA's firmware was >>>>> that I'd modify our firmware interfaces to match yours, and share the >>>>> code. I haven't started on any of this yet due to not having any word >>>>> on how you guys will be shipping the images, etc. It would be nice to >>>>> have some communication on these things :) >>>> >>>> Indeed. For the first time with Maxwell GPUs, NVIDIA-provided firmware >>>> will be required for GPUs to operate properly. This raises several >>>> questions: >>>> >>>> - Should the firmware be released under /lib/firmware/nouveau or >>>> /lib/firmware/nvidia ? (this directory already exists for Tegra USB >>>> firmware and makes more sense to me, since the firmware is not >>>> Nouveau-specific) >>> I think /lib/firmware/nvidia makes sense here too. >>> >>>> - For GPCCS/FECS firmware, should we release the netlist "pack" file >>>> or adopt the same format as Nouveau does? (1 file per firmware) >>>> - Should we keep the current files names (e.g. nvxx_fucxxxx[cd]), or >>>> try to switch to more meaningful ones? >>> I'd actually prefer to have the entire netlists bundled, that gives us >>> updated reg/ctx production values too as you guys tweak/update them >>> for hw bugs (etc). They're also nicer in that you get a single bundle >>> of everything that's required for that chipset+engine. >> >> Good for me - actually that's the solution I implemented first before >> deciding to go "à la Nouveau". ;) Some extra code will be needed, but >> nothing crazy, and we will limit that feature to these chips for which >> NVIDIA officially provides the firmware. >> >>> I don't have too much opinion on naming. The current model of >>> nv{CHIPSET}_{UCODE_TYPE}{REGISTER_BASE}[CODE,DATA] was just nice and >>> convenient to snprintf into a buffer :) >> >> Since the firmware will be provided by us, shall we store it into >> nvidia/<chip>/gpcfe.bin on linux-firmware? > Sounds fine. > >> >>>> - What about signature files that are required for secure boot? >>> As with above, if it's possible to ship them in a single file with the >>> ucode that it belongs to, that'd be ideal. It's not a huge deal >>> though. >> >> So here we actually have several files - it is kind of a mess >> actually. However I could probably merge them into a single netlist >> file like we did for the GPC/FE CS code. > If that's possible, that'd be great. > >> >>>> - Knowing that NVIDIA's firmware ABI is a (very slowly) moving target, >>>> it is worth to aim at ABI compatibility, or should we assume different >>>> paths for Nouveau and NVIDIA firmware? If ABI incompatibilities are >>>> introduced in the way, how do we handle versioning? >>> For incompatible changes, I think appending a -VERSION to each >>> firmware blob is probably the simplest approach. The driver can >>> select the necessary codepath based on what it finds (probably trying >>> newer versions first, obviously). >> >> I agree. Hopefully we won't have too much of this. >> >>>> All these issues make me tend towards having a separate handling of >>>> NVIDIA-released firmware (location, format, and ABI). It will also >>>> make the firmware easier to release if conversions are not necessary >>>> on the way out. What are your thoughts on this? >>> I'm not entirely set here, either way. I somewhat think that >>> initially I should adapt our interfaces to match, and a keep single >>> code path as long as we can. A lot of the changes so far have seemed >>> minor enough we could stick conditionals on firmware version, and if >>> fw changes come along that warrant a radical change in handling from >>> the host, we can abstract it away then. >>> >>> But, I'm open to other suggestions :) >> >> Right now my main concern is to get secure boot support in, and the >> shortest path seems to be to not care about ABI compatibility between >> Nouveau and NV firmwares (at least for PMU). Basically I am hoping >> that you will agree to proceed this way in a first time. ^_^ > If it's really that urgent, my suggestion is to submit the patches as > you did previously, keeping them confined to GK20A/GM20B, and I'll do > something nicer to support boards in general. > > Frankly, I'd have had secure boot support in the driver already had > NVIDIA played things a little differently... Going forward, I'd like > to see NVIDIA not treating the Tegra parts of nouveau as a little > black box where anything goes and things are different from the rest > of the driver. Elaborating a bit on that, as it seems unclear reading it back. It's open-source, don't be afraid of proposing/making changes to things where you see it's needed, even if it does touch the scary desktop GPUs :) Ben. > The chips operate, for the most part, identically to > their big siblings... > > Thanks, > Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-tegra" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
>>>> - What about signature files that are required for secure boot? >>> As with above, if it's possible to ship them in a single file with the >>> ucode that it belongs to, that'd be ideal. It's not a huge deal >>> though. >> >> So here we actually have several files - it is kind of a mess >> actually. However I could probably merge them into a single netlist >> file like we did for the GPC/FE CS code. > If that's possible, that'd be great. I am still trying to figure out internally what makes the most sense, but this is one of the open options. Another is to use another packaging format that is already supported by upstream, or just to lay all firmware binaries as-is, one file per firmware (hopefully properly organized in nvidia/<chip>/). It's quite impressive how much bikeshedding we can do on simple questions. :) Are you aware of a "best practice" for upstream firmwares? >> Right now my main concern is to get secure boot support in, and the >> shortest path seems to be to not care about ABI compatibility between >> Nouveau and NV firmwares (at least for PMU). Basically I am hoping >> that you will agree to proceed this way in a first time. ^_^ > If it's really that urgent, my suggestion is to submit the patches as > you did previously, keeping them confined to GK20A/GM20B, and I'll do > something nicer to support boards in general. > > Frankly, I'd have had secure boot support in the driver already had > NVIDIA played things a little differently... Going forward, I'd like > to see NVIDIA not treating the Tegra parts of nouveau as a little > black box where anything goes and things are different from the rest > of the driver. The chips operate, for the most part, identically to > their big siblings... I agree that's not optimal, and I apologize for that. The main reason is that Tegra and dGPU are working mostly separately within NVIDIA, despite sharing the same driver. In this case I agree it does not make much sense, as this code is not Tegra-specific. But on the other hand, we need to start with a chip. It just happens that for secure boot we are starting with Tegra, but this work should be leverageable for dGPUs and I don't expect it to take too much effort. Of course if we were good citizens we would do it ourselves while we are at it. :/ -- To unsubscribe from this list: send the line "unsubscribe linux-tegra" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drm/nouveau/nvkm/subdev/pmu/gk20a.c index 594f746e68f2..c206ec5e558a 100644 --- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,14 +19,186 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ -#include "priv.h" +#include "priv.h" +#include <core/client.h> +#include <core/gpuobj.h> +#include <subdev/bar.h> +#include <subdev/fb.h> +#include <subdev/mc.h> +#include <subdev/timer.h> +#include <subdev/mmu.h> +#include <subdev/pmu.h> +#include <core/object.h> +#include <core/device.h> +#include <linux/delay.h> +#include <linux/firmware.h> #include <subdev/clk.h> #include <subdev/timer.h> #include <subdev/volt.h> +#define APP_VERSION_GK20A 17997577 +#define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) +#define PMU_QUEUE_COUNT 5 + +#define GK20A_PMU_TRACE_BUFSIZE 0x4000 /* 4K */ +#define GK20A_PMU_DMEM_BLKSIZE2 8 +#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32 +#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64 + +#define PMU_UNIT_REWIND (0x00) +#define PMU_UNIT_PG (0x03) +#define PMU_UNIT_INIT (0x07) +#define PMU_UNIT_PERFMON (0x12) +#define PMU_UNIT_THERM (0x1B) +#define PMU_UNIT_RC (0x1F) +#define PMU_UNIT_NULL (0x20) +#define PMU_UNIT_END (0x23) +#define PMU_UNIT_TEST_START (0xFE) +#define PMU_UNIT_END_SIM (0xFF) +#define PMU_UNIT_TEST_END (0xFF) + +#define PMU_UNIT_ID_IS_VALID(id) \ + (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) +#define PMU_DMEM_ALIGNMENT (4) + #define BUSY_SLOT 0 #define CLK_SLOT 7 +#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin" + +/*Choices for DMA to use*/ +enum { + GK20A_PMU_DMAIDX_UCODE = 0, + GK20A_PMU_DMAIDX_VIRT = 1, + GK20A_PMU_DMAIDX_PHYS_VID = 2, + GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3, + GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4, + GK20A_PMU_DMAIDX_RSVD = 5, + GK20A_PMU_DMAIDX_PELPG = 6, + GK20A_PMU_DMAIDX_END = 7 +}; + +struct pmu_buf_desc { + struct nvkm_gpuobj *obj; + struct nvkm_vma vma; + size_t size; +}; + +struct nvkm_pmu_priv_vm { + struct nvkm_gpuobj *mem; + struct nvkm_gpuobj *pgd; + struct nvkm_vm *vm; +}; + +/*Choices for pmu_state*/ +enum { + PMU_STATE_OFF, /*0 PMU is off */ + PMU_STATE_STARTING, /*1 PMU is on, but not booted */ + PMU_STATE_INIT_RECEIVED /*2 PMU init message received */ +}; + +struct pmu_mem_gk20a { + u32 dma_base; + u8 dma_offset; + u8 dma_idx; + u16 fb_size; +}; + +struct pmu_cmdline_args_gk20a { + u32 cpu_freq_hz; /* Frequency of the clock driving PMU */ + u32 falc_trace_size; /* falctrace buffer size (bytes) */ + u32 falc_trace_dma_base; /* 256-byte block address */ + u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */ + u8 secure_mode; + struct pmu_mem_gk20a gc6_ctx; /* dmem offset of gc6 context */ +}; + +/*pmu ucode descriptor*/ +struct pmu_ucode_desc { + u32 descriptor_size; + u32 image_size; + u32 tools_version; + u32 app_version; + char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH]; + u32 bootloader_start_offset; + u32 bootloader_size; + u32 bootloader_imem_offset; + u32 bootloader_entry_point; + u32 app_start_offset; + u32 app_size; + u32 app_imem_offset; + u32 app_imem_entry; + u32 app_dmem_offset; + u32 app_resident_code_offset; + u32 app_resident_code_size; + u32 app_resident_data_offset; + u32 app_resident_data_size; + u32 nb_overlays; + struct {u32 start; u32 size; } load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY]; + u32 compressed; +}; + +/*pmu msg header*/ +struct pmu_hdr { + u8 unit_id; + u8 size; + u8 ctrl_flags; + u8 seq_id; +}; + +#define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr) + +enum { + PMU_INIT_MSG_TYPE_PMU_INIT = 0, +}; + +/*pmu init msg format*/ +struct pmu_init_msg_pmu_gk20a { + u8 msg_type; + u8 pad; + u16 os_debug_entry_point; + + struct { + u16 size; + u16 offset; + u8 index; + u8 pad; + } queue_info[PMU_QUEUE_COUNT]; + + u16 sw_managed_area_offset; + u16 sw_managed_area_size; +}; + +/*pmu init msg format*/ +struct pmu_init_msg { + union { + u8 msg_type; + struct pmu_init_msg_pmu_gk20a pmu_init_gk20a; + }; +}; + +enum { + PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0, +}; + +struct pmu_rc_msg_unhandled_cmd { + u8 msg_type; + u8 unit_id; +}; + +struct pmu_rc_msg { + u8 msg_type; + struct pmu_rc_msg_unhandled_cmd unhandled_cmd; +}; + +/*pmu generic msg format*/ +struct pmu_msg { + struct pmu_hdr hdr; + union { + struct pmu_init_msg init; + struct pmu_rc_msg rc; + } msg; +}; struct gk20a_pmu_dvfs_data { int p_load_target; @@ -39,8 +211,19 @@ struct gk20a_pmu_priv { struct nvkm_pmu base; struct nvkm_alarm alarm; struct gk20a_pmu_dvfs_data *data; + struct pmu_ucode_desc *desc; + struct pmu_buf_desc ucode; + struct pmu_buf_desc trace_buf; + struct mutex pmu_copy_lock; + bool pmu_ready; + int pmu_state; + struct nvkm_pmu_priv_vm pmuvm; + struct mutex isr_mutex; + bool isr_enabled; }; +#define to_gk20a_priv(ptr) container_of(ptr, struct gk20a_pmu_priv, base) + struct gk20a_pmu_dvfs_dev_status { unsigned long total; unsigned long busy; @@ -48,6 +231,59 @@ struct gk20a_pmu_dvfs_dev_status { }; static int +gk20a_pmu_load_firmware(struct nvkm_pmu *pmu, const struct firmware **pfw) +{ + struct nvkm_device *dev; + char fw[32]; + + dev = nv_device(pmu); + snprintf(fw, sizeof(fw), "nvidia/tegra124/%s", GK20A_PMU_UCODE_IMAGE); + return request_firmware(pfw, fw, nv_device_base(dev)); +} + +static void +gk20a_pmu_release_firmware(struct nvkm_pmu *pmu, const struct firmware *pfw) +{ + nv_debug(pmu, "firmware released\n"); + release_firmware(pfw); +} + +static void +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *pmu, const struct firmware *fw) +{ + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; + + nv_debug(pmu, "GK20A PMU firmware information\n"); + nv_debug(pmu, "descriptor size = %u\n", desc->descriptor_size); + nv_debug(pmu, "image size = %u\n", desc->image_size); + nv_debug(pmu, "app_version = 0x%08x\n", desc->app_version); + nv_debug(pmu, "date = %s\n", desc->date); + nv_debug(pmu, "bootloader_start_offset = 0x%08x\n", + desc->bootloader_start_offset); + nv_debug(pmu, "bootloader_size = 0x%08x\n", desc->bootloader_size); + nv_debug(pmu, "bootloader_imem_offset = 0x%08x\n", + desc->bootloader_imem_offset); + nv_debug(pmu, "bootloader_entry_point = 0x%08x\n", + desc->bootloader_entry_point); + nv_debug(pmu, "app_start_offset = 0x%08x\n", desc->app_start_offset); + nv_debug(pmu, "app_size = 0x%08x\n", desc->app_size); + nv_debug(pmu, "app_imem_offset = 0x%08x\n", desc->app_imem_offset); + nv_debug(pmu, "app_imem_entry = 0x%08x\n", desc->app_imem_entry); + nv_debug(pmu, "app_dmem_offset = 0x%08x\n", desc->app_dmem_offset); + nv_debug(pmu, "app_resident_code_offset = 0x%08x\n", + desc->app_resident_code_offset); + nv_debug(pmu, "app_resident_code_size = 0x%08x\n", + desc->app_resident_code_size); + nv_debug(pmu, "app_resident_data_offset = 0x%08x\n", + desc->app_resident_data_offset); + nv_debug(pmu, "app_resident_data_size = 0x%08x\n", + desc->app_resident_data_size); + nv_debug(pmu, "nb_overlays = %d\n", desc->nb_overlays); + + nv_debug(pmu, "compressed = %u\n", desc->compressed); +} + +static int gk20a_pmu_dvfs_target(struct gk20a_pmu_priv *priv, int *state) { struct nvkm_clk *clk = nvkm_clk(priv); @@ -160,40 +396,529 @@ resched: } static int -gk20a_pmu_fini(struct nvkm_object *object, bool suspend) +gk20a_pmu_enable_hw(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc, bool enable) { - struct nvkm_pmu *pmu = (void *)object; - struct gk20a_pmu_priv *priv = (void *)pmu; + if (enable) { + nv_mask(pmc, 0x000200, 0x00002000, 0x00002000); + nv_rd32(pmc, 0x00000200); + if (nv_wait(priv, 0x0010a10c, 0x00000006, 0x00000000)) + return 0; + nv_mask(pmc, 0x00000200, 0x2000, 0x00000000); + nv_error(priv, "Falcon mem scrubbing timeout\n"); + return -ETIMEDOUT; + } else { + nv_mask(pmc, 0x00000200, 0x2000, 0x00000000); + return 0; + } +} +static void +gk20a_pmu_enable_irq(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc, bool enable) +{ + if (enable) { + nv_debug(priv, "enable pmu irq\n"); + nv_wr32(priv, 0x0010a010, 0xff); + nv_mask(pmc, 0x00000640, 0x1000000, 0x1000000); + nv_mask(pmc, 0x00000644, 0x1000000, 0x1000000); + } else { + nv_debug(priv, "disable pmu irq\n"); + nv_mask(pmc, 0x00000640, 0x1000000, 0x00000000); + nv_mask(pmc, 0x00000644, 0x1000000, 0x00000000); + nv_wr32(priv, 0x0010a014, 0xff); + } - nvkm_timer_alarm_cancel(priv, &priv->alarm); +} - return nvkm_subdev_fini(&pmu->base, suspend); +static int +gk20a_pmu_idle(struct gk20a_pmu_priv *priv) +{ + if (!nv_wait(priv, 0x0010a04c, 0x0000ffff, 0x00000000)) { + nv_error(priv, "timeout waiting pmu idle\n"); + return -EBUSY; + } + + return 0; +} + +static int +gk20a_pmu_enable(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc, bool enable) +{ + u32 pmc_enable; + int err; + + if (enable) { + err = gk20a_pmu_enable_hw(priv, pmc, true); + if (err) + return err; + + err = gk20a_pmu_idle(priv); + if (err) + return err; + + gk20a_pmu_enable_irq(priv, pmc, true); + } else { + pmc_enable = nv_rd32(pmc, 0x200); + if ((pmc_enable & 0x2000) != 0x0) { + gk20a_pmu_enable_irq(priv, pmc, false); + gk20a_pmu_enable_hw(priv, pmc, false); + } + } + + return 0; +} + +static void +gk20a_pmu_copy_to_dmem(struct gk20a_pmu_priv *priv, u32 dst, u8 *src, u32 size, + u8 port) +{ + u32 i, words, bytes; + u32 data, addr_mask; + u32 *src_u32 = (u32 *)src; + + if (size == 0) { + nv_error(priv, "size is zero\n"); + goto out; + } + + if (dst & 0x3) { + nv_error(priv, "dst (0x%08x) not 4-byte aligned\n", dst); + goto out; + } + + mutex_lock(&priv->pmu_copy_lock); + words = size >> 2; + bytes = size & 0x3; + addr_mask = 0xfffc; + dst &= addr_mask; + + nv_wr32(priv, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24))); + + for (i = 0; i < words; i++) { + nv_wr32(priv, (0x10a1c4 + (port * 8)), src_u32[i]); + nv_debug(priv, "0x%08x\n", src_u32[i]); + } + + if (bytes > 0) { + data = 0; + for (i = 0; i < bytes; i++) + ((u8 *)&data)[i] = src[(words << 2) + i]; + nv_wr32(priv, (0x10a1c4 + (port * 8)), data); + nv_debug(priv, "0x%08x\n", data); + } + + data = nv_rd32(priv, (0x10a1c0 + (port * 8))) & addr_mask; + size = ALIGN(size, 4); + if (data != dst + size) { + nv_error(priv, "copy failed.... bytes written %d, expected %d", + data - dst, size); + } + mutex_unlock(&priv->pmu_copy_lock); +out: + nv_debug(priv, "exit %s\n", __func__); +} + +static void +gk20a_copy_from_dmem(struct gk20a_pmu_priv *priv, u32 src, u8 *dst, u32 size, + u8 port) +{ + u32 i, words, bytes; + u32 data, addr_mask; + u32 *dst_u32 = (u32 *)dst; + + if (size == 0) { + nv_error(priv, "size is zero\n"); + goto out; + } + + if (src & 0x3) { + nv_error(priv, "src (0x%08x) not 4-byte aligned\n", src); + goto out; + } + + mutex_lock(&priv->pmu_copy_lock); + + words = size >> 2; + bytes = size & 0x3; + + addr_mask = 0xfffc; + + src &= addr_mask; + + nv_wr32(priv, (0x10a1c0 + (port * 8)), (src | (0x1 << 25))); + + for (i = 0; i < words; i++) { + dst_u32[i] = nv_rd32(priv, (0x0010a1c4 + port * 8)); + nv_debug(priv, "0x%08x\n", dst_u32[i]); + } + if (bytes > 0) { + data = nv_rd32(priv, (0x0010a1c4 + port * 8)); + nv_debug(priv, "0x%08x\n", data); + + for (i = 0; i < bytes; i++) + dst[(words << 2) + i] = ((u8 *)&data)[i]; + } + mutex_unlock(&priv->pmu_copy_lock); +out: + nv_debug(priv, "exit %s\n", __func__); +} + +static int +gk20a_pmu_process_init_msg(struct gk20a_pmu_priv *priv, struct pmu_msg *msg) +{ + struct pmu_init_msg_pmu_gk20a *init; + u32 tail; + + tail = nv_rd32(priv, 0x0010a4cc); + + gk20a_copy_from_dmem(priv, tail, (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0); + + if (msg->hdr.unit_id != PMU_UNIT_INIT) { + nv_error(priv, "expecting init msg\n"); + return -EINVAL; + } + + gk20a_copy_from_dmem(priv, tail + PMU_MSG_HDR_SIZE, + (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0); + + if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { + nv_error(priv, "expecting init msg\n"); + return -EINVAL; + } + + tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT); + nv_wr32(priv, 0x0010a4cc, tail); + init = &msg->msg.init.pmu_init_gk20a; + priv->pmu_ready = true; + priv->pmu_state = PMU_STATE_INIT_RECEIVED; + nv_debug(priv, "init msg processed\n"); + return 0; +} + +static void +gk20a_pmu_process_message(struct work_struct *work) +{ + struct nvkm_pmu *pmu = container_of(work, struct nvkm_pmu, recv.work); + struct gk20a_pmu_priv *priv = to_gk20a_priv(pmu); + struct pmu_msg msg; + struct nvkm_mc *pmc = nvkm_mc(pmu); + + mutex_lock(&priv->isr_mutex); + if (unlikely(!priv->pmu_ready)) { + nv_debug(pmu, "processing init msg\n"); + gk20a_pmu_process_init_msg(priv, &msg); + mutex_unlock(&priv->isr_mutex); + gk20a_pmu_enable_irq(priv, pmc, true); + } else { + mutex_unlock(&priv->isr_mutex); + } +} + +static int +gk20a_pmu_init_vm(struct gk20a_pmu_priv *priv, const struct firmware *fw) +{ + int ret = 0; + u32 *ucode_image; + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; + int i; + struct nvkm_pmu_priv_vm *pmuvm = &priv->pmuvm; + struct nvkm_device *device = nv_device(&priv->base); + struct nvkm_vm *vm; + const u64 pmu_area_len = 300*1024; + + /* mem for inst blk*/ + ret = nvkm_gpuobj_new(nv_object(priv), NULL, 0x1000, 0, 0, &pmuvm->mem); + if (ret) + return ret; + + /* mem for pgd*/ + ret = nvkm_gpuobj_new(nv_object(priv), NULL, 0x8000, 0, 0, &pmuvm->pgd); + if (ret) + return ret; + + /*allocate virtual memory range*/ + ret = nvkm_vm_new(device, 0, pmu_area_len, 0, &vm); + if (ret) + return ret; + + atomic_inc(&vm->engref[NVDEV_SUBDEV_PMU]); + + /* update VM with pgd */ + ret = nvkm_vm_ref(vm, &pmuvm->vm, pmuvm->pgd); + if (ret) + return ret; + + /*update pgd in inst blk */ + nv_wo32(pmuvm->mem, 0x0200, lower_32_bits(pmuvm->pgd->addr)); + nv_wo32(pmuvm->mem, 0x0204, upper_32_bits(pmuvm->pgd->addr)); + nv_wo32(pmuvm->mem, 0x0208, lower_32_bits(pmu_area_len - 1)); + nv_wo32(pmuvm->mem, 0x020c, upper_32_bits(pmu_area_len - 1)); + + /* allocate memory for pmu fw to be copied to*/ + ret = nvkm_gpuobj_new(nv_object(priv), NULL, GK20A_PMU_UCODE_SIZE_MAX, + 0x1000, 0, &priv->ucode.obj); + if (ret) + return ret; + + ucode_image = (u32 *)((u8 *)desc + desc->descriptor_size); + for (i = 0; i < (desc->app_start_offset + desc->app_size); i += 4) + nv_wo32(priv->ucode.obj, i, ucode_image[i/4]); + + /* map allocated memory into GMMU */ + ret = nvkm_gpuobj_map_vm(priv->ucode.obj, vm, NV_MEM_ACCESS_RW, + &priv->ucode.vma); + if (ret) + return ret; + + return ret; +} + +static int +gk20a_init_pmu_setup_sw(struct gk20a_pmu_priv *priv) +{ + struct nvkm_pmu_priv_vm *pmuvm = &priv->pmuvm; + int ret = 0; + + INIT_WORK(&priv->base.recv.work, gk20a_pmu_process_message); + + ret = nvkm_gpuobj_new(nv_object(priv), NULL, GK20A_PMU_TRACE_BUFSIZE, + 0, 0, &priv->trace_buf.obj); + if (ret) + return ret; + + ret = nvkm_gpuobj_map_vm(nv_gpuobj(priv->trace_buf.obj), pmuvm->vm, + NV_MEM_ACCESS_RW, &priv->trace_buf.vma); + if (ret) + return ret; + + return 0; +} + +static int +gk20a_pmu_bootstrap(struct gk20a_pmu_priv *priv) +{ + struct pmu_ucode_desc *desc = priv->desc; + u32 addr_code, addr_data, addr_load; + u32 i, blocks, addr_args; + struct pmu_cmdline_args_gk20a cmdline_args; + struct nvkm_pmu_priv_vm *pmuvm = &priv->pmuvm; + + nv_mask(priv, 0x0010a048, 0x01, 0x01); + /*bind the address*/ + nv_wr32(priv, 0x0010a480, + pmuvm->mem->addr >> 12 | + 0x1 << 30 | + 0x20000000); + + /* TBD: load all other surfaces */ + cmdline_args.falc_trace_size = GK20A_PMU_TRACE_BUFSIZE; + cmdline_args.falc_trace_dma_base = + lower_32_bits(priv->trace_buf.vma.offset >> 8); + cmdline_args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT; + cmdline_args.cpu_freq_hz = 204; + cmdline_args.secure_mode = 0; + + addr_args = (nv_rd32(priv, 0x0010a108) >> 9) & 0x1ff; + addr_args = addr_args << GK20A_PMU_DMEM_BLKSIZE2; + addr_args -= sizeof(struct pmu_cmdline_args_gk20a); + nv_debug(priv, "initiating copy to dmem\n"); + gk20a_pmu_copy_to_dmem(priv, addr_args, + (u8 *)&cmdline_args, + sizeof(struct pmu_cmdline_args_gk20a), 0); + + nv_wr32(priv, 0x0010a1c0, 0x1 << 24); + + addr_code = lower_32_bits((priv->ucode.vma.offset + + desc->app_start_offset + + desc->app_resident_code_offset) >> 8); + + addr_data = lower_32_bits((priv->ucode.vma.offset + + desc->app_start_offset + + desc->app_resident_data_offset) >> 8); + + addr_load = lower_32_bits((priv->ucode.vma.offset + + desc->bootloader_start_offset) >> 8); + + nv_wr32(priv, 0x0010a1c4, GK20A_PMU_DMAIDX_UCODE); + nv_debug(priv, "0x%08x\n", GK20A_PMU_DMAIDX_UCODE); + nv_wr32(priv, 0x0010a1c4, (addr_code)); + nv_debug(priv, "0x%08x\n", (addr_code)); + nv_wr32(priv, 0x0010a1c4, desc->app_size); + nv_debug(priv, "0x%08x\n", desc->app_size); + nv_wr32(priv, 0x0010a1c4, desc->app_resident_code_size); + nv_debug(priv, "0x%08x\n", desc->app_resident_code_size); + nv_wr32(priv, 0x0010a1c4, desc->app_imem_entry); + nv_debug(priv, "0x%08x\n", desc->app_imem_entry); + nv_wr32(priv, 0x0010a1c4, (addr_data)); + nv_debug(priv, "0x%08x\n", (addr_data)); + nv_wr32(priv, 0x0010a1c4, desc->app_resident_data_size); + nv_debug(priv, "0x%08x\n", desc->app_resident_data_size); + nv_wr32(priv, 0x0010a1c4, (addr_code)); + nv_debug(priv, "0x%08x\n", (addr_code)); + nv_wr32(priv, 0x0010a1c4, 0x1); + nv_debug(priv, "0x%08x\n", 1); + nv_wr32(priv, 0x0010a1c4, addr_args); + nv_debug(priv, "0x%08x\n", addr_args); + + nv_wr32(priv, 0x0010a110, + (addr_load) - (desc->bootloader_imem_offset >> 8)); + + blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8; + + for (i = 0; i < blocks; i++) { + nv_wr32(priv, 0x0010a114, + desc->bootloader_imem_offset + (i << 8)); + nv_wr32(priv, 0x0010a11c, + desc->bootloader_imem_offset + (i << 8)); + nv_wr32(priv, 0x0010a118, + 0x01 << 4 | + 0x06 << 8 | + ((GK20A_PMU_DMAIDX_UCODE & 0x07) << 12)); + } + + nv_wr32(priv, 0x0010a104, (desc->bootloader_entry_point)); + nv_wr32(priv, 0x0010a100, 0x1 << 1); + nv_wr32(priv, 0x0010a080, desc->app_version); + + return 0; +} + +static int +gk20a_init_pmu_setup_hw1(struct gk20a_pmu_priv *priv, struct nvkm_mc *pmc) +{ + int err; + + mutex_lock(&priv->isr_mutex); + err = gk20a_pmu_enable(priv, pmc, true); + priv->isr_enabled = (err == 0); + mutex_unlock(&priv->isr_mutex); + if (err) + return err; + + /* setup apertures - virtual */ + nv_wr32(priv, 0x10a600 + 0 * 4, 0x0); + nv_wr32(priv, 0x10a600 + 1 * 4, 0x0); + /* setup apertures - physical */ + nv_wr32(priv, 0x10a600 + 2 * 4, 0x4 | 0x0); + nv_wr32(priv, 0x10a600 + 3 * 4, 0x4 | 0x1); + nv_wr32(priv, 0x10a600 + 4 * 4, 0x4 | 0x2); + + /* TBD: load pmu ucode */ + err = gk20a_pmu_bootstrap(priv); + if (err) + return err; + + return 0; +} + + +static void +gk20a_pmu_intr(struct nvkm_subdev *subdev) +{ + struct gk20a_pmu_priv *priv = to_gk20a_priv(nvkm_pmu(subdev)); + struct nvkm_mc *pmc = nvkm_mc(priv); + u32 intr, mask; + + if (!priv->isr_enabled) + return; + + mask = nv_rd32(priv, 0x0010a018) & nv_rd32(priv, 0x0010a01c); + + intr = nv_rd32(priv, 0x0010a008) & mask; + + nv_debug(priv, "received falcon interrupt: 0x%08x\n", intr); + gk20a_pmu_enable_irq(priv, pmc, false); + + if (!intr || priv->pmu_state == PMU_STATE_OFF) { + nv_wr32(priv, 0x0010a004, intr); + nv_error(priv, "pmu state off\n"); + gk20a_pmu_enable_irq(priv, pmc, true); + } + + if (intr & 0x10) + nv_error(priv, "pmu halt intr not implemented\n"); + + if (intr & 0x20) { + nv_error(priv, "exterr interrupt not impl..Clear interrupt\n"); + nv_mask(priv, 0x0010a16c, (0x1 << 31), 0x00000000); + } + + if (intr & 0x40) { + nv_debug(priv, "scheduling work\n"); + schedule_work(&priv->base.recv.work); + } + + nv_wr32(priv, 0x0010a004, intr); + nv_debug(priv, "irq handled\n"); +} + +static void +gk20a_pmu_pgob(struct nvkm_pmu *pmu, bool enable) +{ } static int gk20a_pmu_init(struct nvkm_object *object) { - struct nvkm_pmu *pmu = (void *)object; - struct gk20a_pmu_priv *priv = (void *)pmu; + struct gk20a_pmu_priv *priv = (void *)object; + struct nvkm_mc *pmc = nvkm_mc(object); int ret; - ret = nvkm_subdev_init(&pmu->base); + ret = nvkm_subdev_init(&priv->base.base); if (ret) return ret; - pmu->pgob = nvkm_pmu_pgob; + priv->pmu_state = PMU_STATE_STARTING; + ret = gk20a_init_pmu_setup_hw1(priv, pmc); + if (ret) + return ret; + + nv_wr32(priv, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); + nv_wr32(priv, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); + nv_wr32(priv, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); - /* init pwr perf counter */ - nv_wr32(pmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); - nv_wr32(pmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); - nv_wr32(pmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); + nvkm_timer_alarm(priv, 2000000000, &priv->alarm); - nvkm_timer_alarm(pmu, 2000000000, &priv->alarm); return ret; } +static int +gk20a_pmu_fini(struct nvkm_object *object, bool suspend) +{ + struct gk20a_pmu_priv *priv = (void *)object; + struct nvkm_mc *pmc = nvkm_mc(object); + + nvkm_timer_alarm_cancel(priv, &priv->alarm); + + cancel_work_sync(&priv->base.recv.work); + + mutex_lock(&priv->isr_mutex); + gk20a_pmu_enable(priv, pmc, false); + priv->isr_enabled = false; + mutex_unlock(&priv->isr_mutex); + + priv->pmu_state = PMU_STATE_OFF; + priv->pmu_ready = false; + nv_wr32(priv, 0x10a014, 0x00000060); + + return nvkm_subdev_fini(&priv->base.base, suspend); +} + +static void +gk20a_pmu_dtor(struct nvkm_object *object) +{ + struct gk20a_pmu_priv *priv = (void *)object; + + nvkm_gpuobj_unmap(&priv->trace_buf.vma); + nvkm_gpuobj_ref(NULL, &priv->trace_buf.obj); + + nvkm_gpuobj_unmap(&priv->ucode.vma); + nvkm_gpuobj_ref(NULL, &priv->ucode.obj); + nvkm_vm_ref(NULL, &priv->pmuvm.vm, priv->pmuvm.pgd); + nvkm_gpuobj_ref(NULL, &priv->pmuvm.pgd); + nvkm_gpuobj_ref(NULL, &priv->pmuvm.mem); +} + static struct gk20a_pmu_dvfs_data -gk20a_dvfs_data= { +gk20a_dvfs_data = { .p_load_target = 70, .p_load_max = 90, .p_smooth = 1, @@ -205,6 +930,9 @@ gk20a_pmu_ctor(struct nvkm_object *parent, struct nvkm_object *engine, struct nvkm_object **pobject) { struct gk20a_pmu_priv *priv; + struct nvkm_pmu *pmu; + struct nvkm_mc *pmc; + const struct firmware *pmufw = NULL; int ret; ret = nvkm_pmu_create(parent, engine, oclass, &priv); @@ -212,10 +940,47 @@ gk20a_pmu_ctor(struct nvkm_object *parent, struct nvkm_object *engine, if (ret) return ret; + mutex_init(&priv->isr_mutex); + mutex_init(&priv->pmu_copy_lock); priv->data = &gk20a_dvfs_data; + pmu = &priv->base; + pmc = nvkm_mc(pmu); + nv_subdev(pmu)->intr = gk20a_pmu_intr; + ret = gk20a_pmu_load_firmware(pmu, &pmufw); + if (ret < 0) { + nv_error(priv, "failed to load pmu fimware\n"); + return ret; + } + + ret = gk20a_pmu_init_vm(priv, pmufw); + if (ret < 0) { + nv_error(priv, "failed to map pmu fw to va space\n"); + goto err; + } + + priv->desc = (struct pmu_ucode_desc *)pmufw->data; + gk20a_pmu_dump_firmware_info(pmu, pmufw); + + if (priv->desc->app_version != APP_VERSION_GK20A) { + nv_error(priv, "PMU version unsupported: %d\n", + priv->desc->app_version); + ret = -EINVAL; + goto err; + } + + ret = gk20a_init_pmu_setup_sw(priv); + if (ret) + goto err; + + pmu->pgob = nvkm_pmu_pgob; nvkm_alarm_init(&priv->alarm, gk20a_pmu_dvfs_work); + return 0; + +err: + gk20a_pmu_release_firmware(pmu, pmufw); + return ret; } struct nvkm_oclass * @@ -223,8 +988,10 @@ gk20a_pmu_oclass = &(struct nvkm_pmu_impl) { .base.handle = NV_SUBDEV(PMU, 0xea), .base.ofuncs = &(struct nvkm_ofuncs) { .ctor = gk20a_pmu_ctor, - .dtor = _nvkm_pmu_dtor, + .dtor = gk20a_pmu_dtor, .init = gk20a_pmu_init, .fini = gk20a_pmu_fini, }, + .pgob = gk20a_pmu_pgob, }.base; +