diff mbox series

[RFC,7/7] hw/mem/cxl_type3: add read/write support to dynamic capacity

Message ID 20230511175609.2091136-8-fan.ni@samsung.com
State New
Headers show
Series [RFC,1/7] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command | expand

Commit Message

Fan Ni May 11, 2023, 5:56 p.m. UTC
From: Fan Ni <nifan@outlook.com>

Before the change, read from or write to dynamic capacity of the memory
device is not support as 1) no host backed file/memory is provided for
it; 2) no address space is created for the dynamic capacity.

With the change, add code to support following:
1. add a new property to type3 device "dc-memdev" to point to host
   memory backend for dynamic capacity;
2. add a bitmap for each region to track whether a block is host backed,
which will be used for address check when read/write dynamic capacity;
3. add namespace for dynamic capacity for read/write support;
4. create cdat entries for each dynamic capacity region;

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/cxl/cxl-mailbox-utils.c  |  21 ++-
 hw/mem/cxl_type3.c          | 336 +++++++++++++++++++++++++++++-------
 include/hw/cxl/cxl_device.h |   8 +-
 3 files changed, 298 insertions(+), 67 deletions(-)

Comments

nifan@outlook.com June 28, 2023, 5:09 p.m. UTC | #1
The 05/15/2023 16:22, Jonathan Cameron wrote:
> On Thu, 11 May 2023 17:56:40 +0000
> Fan Ni <fan.ni@samsung.com> wrote:
> 
> > From: Fan Ni <nifan@outlook.com>
> > 
> > Before the change, read from or write to dynamic capacity of the memory
> > device is not support as 1) no host backed file/memory is provided for
> > it; 2) no address space is created for the dynamic capacity.
> 
> Ah nice. I should have read ahead.  Probably makes sense to reorder things
> so that when we present DCD region it will work.

We can back dynamic capacity with host memory/file and create address
space for dc regions, but until extents can be added we should not expect
any read/write can happen to the dynamic capacity, right?

Fan
> 
> > 
> > With the change, add code to support following:
> > 1. add a new property to type3 device "dc-memdev" to point to host
> >    memory backend for dynamic capacity;
> > 2. add a bitmap for each region to track whether a block is host backed,
> > which will be used for address check when read/write dynamic capacity;
> > 3. add namespace for dynamic capacity for read/write support;
> > 4. create cdat entries for each dynamic capacity region;
> > 
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> > ---
> >  hw/cxl/cxl-mailbox-utils.c  |  21 ++-
> >  hw/mem/cxl_type3.c          | 336 +++++++++++++++++++++++++++++-------
> >  include/hw/cxl/cxl_device.h |   8 +-
> >  3 files changed, 298 insertions(+), 67 deletions(-)
> > 
> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> > index 7212934627..efe61e67fb 100644
> > --- a/hw/cxl/cxl-mailbox-utils.c
> > +++ b/hw/cxl/cxl-mailbox-utils.c
> > @@ -391,9 +391,11 @@ static CXLRetCode cmd_firmware_update_get_info(struct cxl_cmd *cmd,
> >          char fw_rev4[0x10];
> >      } QEMU_PACKED *fw_info;
> >      QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
> > +	CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
> >  
> >      if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
> > -        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
> > +			(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
> 
> Keep old alignment
> 
> > +		(ct3d->dc.total_dynamic_capicity < CXL_CAPACITY_MULTIPLIER)) {
> 
> We should think about the separation between what goes in cxl_dstate and directly
> in ct3d. That boundary has been blurring for a while and getting some review
> comments.
> 
> >          return CXL_MBOX_INTERNAL_ERROR;
> >      }
> >  
> > @@ -534,7 +536,9 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
> >      CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
> >  
> >      if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> > -        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
> > +		(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> > +		(!QEMU_IS_ALIGNED(ct3d->dc.total_dynamic_capicity,
> > +						CXL_CAPACITY_MULTIPLIER))) {
> >          return CXL_MBOX_INTERNAL_ERROR;
> >      }
> >  
> > @@ -543,7 +547,8 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
> >  
> >      snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
> >  
> > -    stq_le_p(&id->total_capacity, cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER);
> > +	stq_le_p(&id->total_capacity,
> > +			cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
> 
> Pull the rename out as a precursor patch.
> 
> >      stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
> >      stq_le_p(&id->volatile_capacity, cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER);
> >      stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d));
> > @@ -568,9 +573,12 @@ static CXLRetCode cmd_ccls_get_partition_info(struct cxl_cmd *cmd,
> >          uint64_t next_pmem;
> >      } QEMU_PACKED *part_info = (void *)cmd->payload;
> >      QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
> > +	CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
> >  
> >      if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> > -        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
> > +		(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> > +		(!QEMU_IS_ALIGNED(ct3d->dc.total_dynamic_capicity,
> > +						CXL_CAPACITY_MULTIPLIER))) {
> >          return CXL_MBOX_INTERNAL_ERROR;
> >      }
> >  
> > @@ -881,9 +889,8 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
> >      struct clear_poison_pl *in = (void *)cmd->payload;
> >  
> >      dpa = ldq_le_p(&in->dpa);
> > -    if (dpa + 64 > cxl_dstate->mem_size) {
> > -        return CXL_MBOX_INVALID_PA;
> > -    }
> > +	if (dpa + 64 > cxl_dstate->static_mem_size && ct3d->dc.num_regions == 0)
> 
> This test will need expanding to include DPAs in DC regions.
> 
> > +		return CXL_MBOX_INVALID_PA;
> >  
> >      QLIST_FOREACH(ent, poison_list, node) {
> >          /*
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index 70d47d43b9..334660bd0f 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -33,8 +33,8 @@ enum {
> >  };
> >  
> >  static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
> > -                                         int dsmad_handle, MemoryRegion *mr,
> > -                                         bool is_pmem, uint64_t dpa_base)
> > +		int dsmad_handle, uint8_t flags,
> > +		uint64_t dpa_base, uint64_t size)
> >  {
> >      g_autofree CDATDsmas *dsmas = NULL;
> >      g_autofree CDATDslbis *dslbis0 = NULL;
> > @@ -53,9 +53,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
> >              .length = sizeof(*dsmas),
> >          },
> >          .DSMADhandle = dsmad_handle,
> > -        .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
> > +		.flags = flags,
> >          .DPA_base = dpa_base,
> > -        .DPA_length = int128_get64(mr->size),
> > +		.DPA_length = size,
> >      };
> >  
> >      /* For now, no memory side cache, plausiblish numbers */
> > @@ -137,9 +137,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
> >           * NV: Reserved - the non volatile from DSMAS matters
> >           * V: EFI_MEMORY_SP
> >           */
> > -        .EFI_memory_type_attr = is_pmem ? 2 : 1,
> > +		.EFI_memory_type_attr = flags ? 2 : 1,
> 
> Fix all these alignment changes (spaces vs tabs)
> 
> >          .DPA_offset = 0,
> > -        .DPA_length = int128_get64(mr->size),
> > +		.DPA_length = size,
> >      };
> >  
> >      /* Header always at start of structure */
> > @@ -158,14 +158,15 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
> >      g_autofree CDATSubHeader **table = NULL;
> >      CXLType3Dev *ct3d = priv;
> >      MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
> > +	MemoryRegion *dc_mr = NULL;
> >      int dsmad_handle = 0;
> >      int cur_ent = 0;
> >      int len = 0;
> >      int rc, i;
> > +	uint64_t vmr_size = 0, pmr_size = 0;
> >  
> > -    if (!ct3d->hostpmem && !ct3d->hostvmem) {
> > -        return 0;
> > -    }
> > +	if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions)
> > +		return 0;
> >  
> >      if (ct3d->hostvmem) {
> >          volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
> > @@ -173,6 +174,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
> >              return -EINVAL;
> >          }
> >          len += CT3_CDAT_NUM_ENTRIES;
> > +		vmr_size = volatile_mr->size;
> >      }
> >  
> >      if (ct3d->hostpmem) {
> > @@ -181,7 +183,19 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
> >              return -EINVAL;
> >          }
> >          len += CT3_CDAT_NUM_ENTRIES;
> > -    }
> > +		pmr_size = nonvolatile_mr->size;
> > +	}
> > +
> > +	if (ct3d->dc.num_regions) {
> > +		if (ct3d->dc.host_dc) {
> > +			dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> > +			if (!dc_mr)
> > +				return -EINVAL;
> > +			len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
> > +		} else {
> > +			return -EINVAL;
> > +		}
> > +	}
> >  
> >      table = g_malloc0(len * sizeof(*table));
> >      if (!table) {
> > @@ -189,23 +203,45 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
> >      }
> >  
> >      /* Now fill them in */
> > -    if (volatile_mr) {
> > -        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
> > -                                           false, 0);
> > -        if (rc < 0) {
> > -            return rc;
> > -        }
> > -        cur_ent = CT3_CDAT_NUM_ENTRIES;
> > -    }
> > +	if (volatile_mr) {
> > +		rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++,
> > +				0, 0, vmr_size);
> > +		if (rc < 0)
> > +			return rc;
> Without the tabs / spaces accidental conversion this diff should look a lot
> clearer..
> > +		cur_ent = CT3_CDAT_NUM_ENTRIES;
> > +	}
> > +
> > +	if (nonvolatile_mr) {
> > +		rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
> > +				CDAT_DSMAS_FLAG_NV, vmr_size, pmr_size);
> > +		if (rc < 0)
> > +			goto error_cleanup;
> > +		cur_ent += CT3_CDAT_NUM_ENTRIES;
> > +	}
> > +
> > +	if (dc_mr) {
> > +		uint64_t region_base = vmr_size + pmr_size;
> > +
> > +		/*
> > +		 * Currently we create cdat entries for each region, should we only
> > +		 * create dsmas table instead??
> 
> I don't think it does any harm to have a lot of similar entries. We may want to reconsider
> this in the longer term to make sure that more complex code paths are handled where
> things are shared.  What combinations does the spec allow?
> One entry for all regions with them all sharing a single dsmad handle?
> 
> 
> > +		 * We assume all dc regions are non-volatile for now.
> > +		 *
> > +		 */
> > +		for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +			rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent])
> > +					, dsmad_handle++
> > +					, CDAT_DSMAS_FLAG_NV|CDAT_DSMAS_FLAG_DYNAMIC_CAP
> > +					, region_base, ct3d->dc.regions[i].len);
> > +			if (rc < 0)
> > +				goto error_cleanup;
> > +			ct3d->dc.regions[i].dsmadhandle = dsmad_handle-1;
> > +
> > +			cur_ent += CT3_CDAT_NUM_ENTRIES;
> > +			region_base += ct3d->dc.regions[i].len;
> > +		}
> > +	}
> >  
> > -    if (nonvolatile_mr) {
> > -        rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
> > -                nonvolatile_mr, true, (volatile_mr ? volatile_mr->size : 0));
> > -        if (rc < 0) {
> > -            goto error_cleanup;
> > -        }
> > -        cur_ent += CT3_CDAT_NUM_ENTRIES;
> > -    }
> >      assert(len == cur_ent);
> >  
> >      *cdat_table = g_steal_pointer(&table);
> > @@ -706,6 +742,11 @@ static int cxl_create_toy_regions(CXLType3Dev *ct3d)
> >  		/* dsmad_handle is set when creating cdat table entries */
> >  		region->flags = 0;
> >  
> > +		region->blk_bitmap = bitmap_new(region->len / region->block_size);
> > +		if (!region->blk_bitmap)
> > +			return -1;
> > +		bitmap_zero(region->blk_bitmap, region->len / region->block_size);
> > +
> >  		region_base += region->len;
> >  	}
> >  	QTAILQ_INIT(&ct3d->dc.extents);
> > @@ -713,11 +754,24 @@ static int cxl_create_toy_regions(CXLType3Dev *ct3d)
> >  	return 0;
> >  }
> >  
> > +static void cxl_destroy_toy_regions(CXLType3Dev *ct3d)
> 
> Why toy?  They work after this so no longer toys ;)
> 
> > +{
> > +	int i;
> > +	struct CXLDCD_Region *region;
> > +
> > +	for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +		region = &ct3d->dc.regions[i];
> > +		if (region->blk_bitmap)
> > +			g_free(region->blk_bitmap);
> Why is check needed? Is there a path where we call this function
> without the bitmap having been allocated successfully?
> 
> > +	}
> > +}
> > +
> >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> >  {
> >      DeviceState *ds = DEVICE(ct3d);
> >  
> > -    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) {
> > +	if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
> > +			&& !ct3d->dc.num_regions) {
> >          error_setg(errp, "at least one memdev property must be set");
> >          return false;
> >      } else if (ct3d->hostmem && ct3d->hostpmem) {
> > @@ -754,7 +808,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> >          }
> >          address_space_init(&ct3d->hostvmem_as, vmr, v_name);
> >          ct3d->cxl_dstate.vmem_size = vmr->size;
> > -        ct3d->cxl_dstate.mem_size += vmr->size;
> > +		ct3d->cxl_dstate.static_mem_size += vmr->size;
> >          g_free(v_name);
> >      }
> >  
> > @@ -777,12 +831,47 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> >          }
> >          address_space_init(&ct3d->hostpmem_as, pmr, p_name);
> >          ct3d->cxl_dstate.pmem_size = pmr->size;
> > -        ct3d->cxl_dstate.mem_size += pmr->size;
> > +		ct3d->cxl_dstate.static_mem_size += pmr->size;
> >          g_free(p_name);
> >      }
> >  
> > -	if (cxl_create_toy_regions(ct3d))
> > -		return false;
> > +	ct3d->dc.total_dynamic_capicity = 0;
> > +	if (ct3d->dc.host_dc) {
> > +		MemoryRegion *dc_mr;
> > +		char *dc_name;
> > +		uint64_t total_region_size = 0;
> > +		int i;
> > +
> > +		dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> > +		if (!dc_mr) {
> > +			error_setg(errp, "dynamic capacity must have backing device");
> > +			return false;
> 
> > +		}
> > +		/* FIXME: set dc as nonvolatile for now */
> That's fine. I think to do anything else we'll want multiple backends anyway.
> Perhaps rename the parameter to reflect that it's volatile for now though otherwise
> we'll end up deprecating another memory region command line parameter and people will
> begin to get grumpy ;)
> 
> > +		memory_region_set_nonvolatile(dc_mr, true);
> > +		memory_region_set_enabled(dc_mr, true);
> > +		host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
> > +		if (ds->id) {
> > +			dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
> > +		} else {
> > +			dc_name = g_strdup("cxl-dcd-dpa-dc-space");
> > +		}
> > +		address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
> > +
> > +		if (cxl_create_toy_regions(ct3d)) {
> > +			return false;
> > +		}
> > +
> > +		for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +			total_region_size += ct3d->dc.regions[i].len;
> > +		}
> > +		/* Make sure the host backend is large enough to cover all dc range */
> > +		assert(total_region_size <= dc_mr->size);
> > +		assert(dc_mr->size % (256*1024*1024) == 0);
> > +
> > +		ct3d->dc.total_dynamic_capicity = total_region_size;
> > +		g_free(dc_name);
> > +	}
> >  
> >      return true;
> >  }
> > @@ -890,6 +979,10 @@ err_release_cdat:
> >  err_free_special_ops:
> >      g_free(regs->special_ops);
> >  err_address_space_free:
> > +	if (ct3d->dc.host_dc) {
> > +		cxl_destroy_toy_regions(ct3d);
> > +		address_space_destroy(&ct3d->dc.host_dc_as);
> > +	}
> >      if (ct3d->hostpmem) {
> >          address_space_destroy(&ct3d->hostpmem_as);
> >      }
> > @@ -909,6 +1002,10 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      cxl_doe_cdat_release(cxl_cstate);
> >      spdm_sock_fini(ct3d->doe_spdm.socket);
> >      g_free(regs->special_ops);
> > +	if (ct3d->dc.host_dc) {
> > +		cxl_destroy_toy_regions(ct3d);
> > +		address_space_destroy(&ct3d->dc.host_dc_as);
> > +	}
> >      if (ct3d->hostpmem) {
> >          address_space_destroy(&ct3d->hostpmem_as);
> >      }
> > @@ -917,6 +1014,100 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      }
> >  }
> >  
> > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +		uint64_t len)
> > +{
> > +	int i;
> > +	CXLDCD_Region *region = NULL;
> > +
> > +	if (dpa < ct3d->dc.regions[0].base
> > +		|| dpa >= ct3d->dc.regions[0].base + ct3d->dc.total_dynamic_capicity)
> > +		return;
> > +
> > +	/*
> > +	 * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +	 * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +	 * Region 7 for the highest DPA.
> > +	 * So we check from the last region to find where the dpa belongs.
> > +	 * access across multiple regions is not allowed.
> > +	 **/
> > +	for (i = ct3d->dc.num_regions-1; i >= 0; i--) {
> > +		region = &ct3d->dc.regions[i];
> > +		if (dpa >= region->base)
> > +			break;
> > +	}
> > +
> > +	bitmap_set(region->blk_bitmap, (dpa-region->base)/region->block_size,
> > +			len/region->block_size);
> > +}
> > +
> > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +		uint64_t len)
> > +{
> > +	int i;
> > +	CXLDCD_Region *region = NULL;
> > +	uint64_t nbits;
> > +	long nr;
> > +
> > +	if (dpa < ct3d->dc.regions[0].base
> > +		   || dpa >= ct3d->dc.regions[0].base + ct3d->dc.total_dynamic_capicity)
> > +		return false;
> > +
> > +	/*
> > +	 * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +	 * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +	 * Region 7 for the highest DPA.
> > +	 * So we check from the last region to find where the dpa belongs.
> > +	 * access across multiple regions is not allowed.
> > +	 **/
> > +	for (i = ct3d->dc.num_regions-1; i >= 0; i--) {
> > +		region = &ct3d->dc.regions[i];
> > +		if (dpa >= region->base)
> > +			break;
> > +	}
> > +
> > +	nr = (dpa-region->base)/region->block_size;
> > +	nbits = (len + region->block_size-1)/region->block_size;
> > +	if (find_next_zero_bit(region->blk_bitmap, nr+nbits, nr)
> > +			>= nr+nbits)
> > +		return true;
> > +
> > +	return false;
> 
> return find_next_zero_bit(...) >= nr + nbits;
> 
> > +}
> > +
> > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +		uint64_t len)
> > +{
> > +	int i;
> > +	CXLDCD_Region *region = NULL;
> > +	uint64_t nbits;
> > +	long nr;
> > +
> > +	if (dpa < ct3d->dc.regions[0].base
> > +		|| dpa >= ct3d->dc.regions[0].base + ct3d->dc.total_dynamic_capicity)
> > +		return;
> > +
> > +	/*
> > +	 * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +	 * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +	 * Region 7 for the highest DPA.
> > +	 * So we check from the last region to find where the dpa belongs.
> > +	 * access across multiple regions is not allowed.
> > +	 **/
> > +	for (i = ct3d->dc.num_regions-1; i >= 0; i--) {
> > +		region = &ct3d->dc.regions[i];
> > +		if (dpa >= region->base)
> > +			break;
> > +	}
> > +
> > +	nr = (dpa-region->base) / region->block_size;
> > +	nbits = (len + region->block_size-1) / region->block_size;
> 
> Why handle non precise multiple?  
> 
> > +	for (i = 0; i < nbits; i++) {
> > +		clear_bit(nr, region->blk_bitmap);
> > +		nr++;
> > +	}
> 
> bitmap_clear()?
> 
> 
> 
> > +
> >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> >  {
> >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > @@ -973,16 +1164,24 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> >                                         AddressSpace **as,
> >                                         uint64_t *dpa_offset)
> >  {
> > -    MemoryRegion *vmr = NULL, *pmr = NULL;
> > +	MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
> > +	uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
> >  
> >      if (ct3d->hostvmem) {
> >          vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> > +		vmr_size = int128_get64(vmr->size);
> >      }
> >      if (ct3d->hostpmem) {
> >          pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> > +		pmr_size = int128_get64(pmr->size);
> >      }
> > +	if (ct3d->dc.host_dc) {
> > +		dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> > +		/* Do we want dc_size to be dc_mr->size or not?? */
> > +		dc_size = ct3d->dc.total_dynamic_capicity;
> > +	}
> >  
> > -    if (!vmr && !pmr) {
> > +	if (!vmr && !pmr && !dc_mr) {
> >          return -ENODEV;
> >      }
> >  
> > @@ -990,20 +1189,22 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> >          return -EINVAL;
> >      }
> >  
> > -    if (*dpa_offset > int128_get64(ct3d->cxl_dstate.mem_size)) {
> > +    if (*dpa_offset >= vmr_size + pmr_size + dc_size ||
> > +       (*dpa_offset >= vmr_size + pmr_size && ct3d->dc.num_regions == 0)) {
> >          return -EINVAL;
> >      }
> >  
> > -    if (vmr) {
> > -        if (*dpa_offset < int128_get64(vmr->size)) {
> > -            *as = &ct3d->hostvmem_as;
> > -        } else {
> > -            *as = &ct3d->hostpmem_as;
> > -            *dpa_offset -= vmr->size;
> > -        }
> > -    } else {
> > -        *as = &ct3d->hostpmem_as;
> > -    }
> > +	if (*dpa_offset < vmr_size)
> > +		*as = &ct3d->hostvmem_as;
> > +	else if (*dpa_offset < vmr_size + pmr_size) {
> > +		*as = &ct3d->hostpmem_as;
> > +		*dpa_offset -= vmr_size;
> > +	} else {
> > +		if (!test_region_block_backed(ct3d, *dpa_offset, size))
> > +			return -ENODEV;
> > +		*as = &ct3d->dc.host_dc_as;
> > +		*dpa_offset -= (vmr_size + pmr_size);
> > +	}
> >  
> >      return 0;
> >  }
> > @@ -1069,6 +1270,8 @@ static Property ct3_props[] = {
> >      DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
> >      DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
> >  	DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
> > +	DEFINE_PROP_LINK("dc-memdev", CXLType3Dev, dc.host_dc,
> > +					TYPE_MEMORY_BACKEND, HostMemoryBackend *),
> 
> Perhaps volatile-dc-memdev?  leaves us space for a persistent one in future.
> If anyone every cares that is ;)
> 
> >      DEFINE_PROP_END_OF_LIST(),
> >  };
> >  
> > @@ -1135,34 +1338,41 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
> >  
> >  static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
> >  {
> > -    MemoryRegion *vmr = NULL, *pmr = NULL;
> > +	MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
> >      AddressSpace *as;
> > +	uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
> >  
> >      if (ct3d->hostvmem) {
> >          vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> > +		vmr_size = int128_get64(vmr->size);
> >      }
> >      if (ct3d->hostpmem) {
> >          pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> > +		pmr_size = int128_get64(pmr->size);
> >      }
> > +	if (ct3d->dc.host_dc) {
> > +		dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> > +		dc_size = ct3d->dc.total_dynamic_capicity;
> > +	}
> >  
> > -    if (!vmr && !pmr) {
> > +    if (!vmr && !pmr && !dc_mr) {
> >          return false;
> >      }
> >  
> > -    if (dpa_offset + 64 > int128_get64(ct3d->cxl_dstate.mem_size)) {
> > -        return false;
> > -    }
> > +	if (dpa_offset >= vmr_size + pmr_size + dc_size)
> > +		return false;
> > +	if (dpa_offset + 64 >= vmr_size + pmr_size && ct3d->dc.num_regions == 0)
> > +		return false;
> >  
> > -    if (vmr) {
> > -        if (dpa_offset <= int128_get64(vmr->size)) {
> > -            as = &ct3d->hostvmem_as;
> > -        } else {
> > -            as = &ct3d->hostpmem_as;
> > -            dpa_offset -= vmr->size;
> > -        }
> > -    } else {
> > -        as = &ct3d->hostpmem_as;
> > -    }
> > +	if (dpa_offset < vmr_size) {
> > +		as = &ct3d->hostvmem_as;
> > +	} else if (dpa_offset < vmr_size + pmr_size) {
> > +		as = &ct3d->hostpmem_as;
> > +		dpa_offset -= vmr->size;
> > +	} else {
> > +		as = &ct3d->dc.host_dc_as;
> > +		dpa_offset -= (vmr_size + pmr_size);
> > +	}
> >  
> >      address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, &data, 64);
> >      return true;
> > @@ -1711,6 +1921,14 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path, CxlEventLog
> >  		memcpy(&dCap.dynamic_capacity_extent, &extents[i]
> >  				, sizeof(CXLDCExtent_raw));
> >  
> > +		if (dCap.type == 0x0)
> 
> Enum values as suggested in earlier patch.
> 
> > +			set_region_block_backed(dcd, extents[i].start_dpa, extents[i].len);
> > +		else if (dCap.type == 0x1)
> > +			clear_region_block_backed(dcd, extents[i].start_dpa,
> > +					extents[i].len);
> > +		else
> > +			error_setg(errp, "DC event not support yet, no bitmap op");
> > +
> >  		if (cxl_event_insert(cxlds, CXL_EVENT_TYPE_DYNAMIC_CAP,
> >  					(CXLEventRecordRaw *)&dCap)) {
> >  			;
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > index c0c8fcc24b..d9b6776e2c 100644
> > --- a/include/hw/cxl/cxl_device.h
> > +++ b/include/hw/cxl/cxl_device.h
> > @@ -211,7 +211,7 @@ typedef struct cxl_device_state {
> >      } timestamp;
> >  
> >      /* memory region size, HDM */
> > -    uint64_t mem_size;
> > +	uint64_t static_mem_size;
> >      uint64_t pmem_size;
> >      uint64_t vmem_size;
> >  
> > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> >  	uint64_t block_size;
> >  	uint32_t dsmadhandle;
> >  	uint8_t flags;
> > +	unsigned long *blk_bitmap;
> >  } CXLDCD_Region;
> >  
> >  struct CXLType3Dev {
> > @@ -447,12 +448,17 @@ struct CXLType3Dev {
> >      uint64_t poison_list_overflow_ts;
> >  
> >  	struct dynamic_capacity {
> > +		HostMemoryBackend *host_dc;
> > +		AddressSpace host_dc_as;
> > +
> > +		uint8_t num_hosts; //Table 7-55
> 
> Not visible here as far as I can see. So leave it for now.
> 
> >  		uint8_t num_regions; // 1-8
> >  		struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
> >  		CXLDCDExtentList extents;
> >  
> >  		uint32_t total_extent_count;
> >  		uint32_t ext_list_gen_seq;
> > +		uint64_t total_dynamic_capicity; // 256M aligned
> capacity
> 
> >  	} dc;
> >  };
> >  
>
diff mbox series

Patch

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 7212934627..efe61e67fb 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -391,9 +391,11 @@  static CXLRetCode cmd_firmware_update_get_info(struct cxl_cmd *cmd,
         char fw_rev4[0x10];
     } QEMU_PACKED *fw_info;
     QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
+	CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 
     if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
-        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
+			(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
+		(ct3d->dc.total_dynamic_capicity < CXL_CAPACITY_MULTIPLIER)) {
         return CXL_MBOX_INTERNAL_ERROR;
     }
 
@@ -534,7 +536,9 @@  static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
     CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
 
     if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+		(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+		(!QEMU_IS_ALIGNED(ct3d->dc.total_dynamic_capicity,
+						CXL_CAPACITY_MULTIPLIER))) {
         return CXL_MBOX_INTERNAL_ERROR;
     }
 
@@ -543,7 +547,8 @@  static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
 
     snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
 
-    stq_le_p(&id->total_capacity, cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER);
+	stq_le_p(&id->total_capacity,
+			cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
     stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
     stq_le_p(&id->volatile_capacity, cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER);
     stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d));
@@ -568,9 +573,12 @@  static CXLRetCode cmd_ccls_get_partition_info(struct cxl_cmd *cmd,
         uint64_t next_pmem;
     } QEMU_PACKED *part_info = (void *)cmd->payload;
     QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
+	CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 
     if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+		(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+		(!QEMU_IS_ALIGNED(ct3d->dc.total_dynamic_capicity,
+						CXL_CAPACITY_MULTIPLIER))) {
         return CXL_MBOX_INTERNAL_ERROR;
     }
 
@@ -881,9 +889,8 @@  static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
     struct clear_poison_pl *in = (void *)cmd->payload;
 
     dpa = ldq_le_p(&in->dpa);
-    if (dpa + 64 > cxl_dstate->mem_size) {
-        return CXL_MBOX_INVALID_PA;
-    }
+	if (dpa + 64 > cxl_dstate->static_mem_size && ct3d->dc.num_regions == 0)
+		return CXL_MBOX_INVALID_PA;
 
     QLIST_FOREACH(ent, poison_list, node) {
         /*
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 70d47d43b9..334660bd0f 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -33,8 +33,8 @@  enum {
 };
 
 static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
-                                         int dsmad_handle, MemoryRegion *mr,
-                                         bool is_pmem, uint64_t dpa_base)
+		int dsmad_handle, uint8_t flags,
+		uint64_t dpa_base, uint64_t size)
 {
     g_autofree CDATDsmas *dsmas = NULL;
     g_autofree CDATDslbis *dslbis0 = NULL;
@@ -53,9 +53,9 @@  static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
             .length = sizeof(*dsmas),
         },
         .DSMADhandle = dsmad_handle,
-        .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
+		.flags = flags,
         .DPA_base = dpa_base,
-        .DPA_length = int128_get64(mr->size),
+		.DPA_length = size,
     };
 
     /* For now, no memory side cache, plausiblish numbers */
@@ -137,9 +137,9 @@  static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
          * NV: Reserved - the non volatile from DSMAS matters
          * V: EFI_MEMORY_SP
          */
-        .EFI_memory_type_attr = is_pmem ? 2 : 1,
+		.EFI_memory_type_attr = flags ? 2 : 1,
         .DPA_offset = 0,
-        .DPA_length = int128_get64(mr->size),
+		.DPA_length = size,
     };
 
     /* Header always at start of structure */
@@ -158,14 +158,15 @@  static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
     g_autofree CDATSubHeader **table = NULL;
     CXLType3Dev *ct3d = priv;
     MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
+	MemoryRegion *dc_mr = NULL;
     int dsmad_handle = 0;
     int cur_ent = 0;
     int len = 0;
     int rc, i;
+	uint64_t vmr_size = 0, pmr_size = 0;
 
-    if (!ct3d->hostpmem && !ct3d->hostvmem) {
-        return 0;
-    }
+	if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions)
+		return 0;
 
     if (ct3d->hostvmem) {
         volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
@@ -173,6 +174,7 @@  static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
             return -EINVAL;
         }
         len += CT3_CDAT_NUM_ENTRIES;
+		vmr_size = volatile_mr->size;
     }
 
     if (ct3d->hostpmem) {
@@ -181,7 +183,19 @@  static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
             return -EINVAL;
         }
         len += CT3_CDAT_NUM_ENTRIES;
-    }
+		pmr_size = nonvolatile_mr->size;
+	}
+
+	if (ct3d->dc.num_regions) {
+		if (ct3d->dc.host_dc) {
+			dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
+			if (!dc_mr)
+				return -EINVAL;
+			len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
+		} else {
+			return -EINVAL;
+		}
+	}
 
     table = g_malloc0(len * sizeof(*table));
     if (!table) {
@@ -189,23 +203,45 @@  static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
     }
 
     /* Now fill them in */
-    if (volatile_mr) {
-        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
-                                           false, 0);
-        if (rc < 0) {
-            return rc;
-        }
-        cur_ent = CT3_CDAT_NUM_ENTRIES;
-    }
+	if (volatile_mr) {
+		rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++,
+				0, 0, vmr_size);
+		if (rc < 0)
+			return rc;
+		cur_ent = CT3_CDAT_NUM_ENTRIES;
+	}
+
+	if (nonvolatile_mr) {
+		rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
+				CDAT_DSMAS_FLAG_NV, vmr_size, pmr_size);
+		if (rc < 0)
+			goto error_cleanup;
+		cur_ent += CT3_CDAT_NUM_ENTRIES;
+	}
+
+	if (dc_mr) {
+		uint64_t region_base = vmr_size + pmr_size;
+
+		/*
+		 * Currently we create cdat entries for each region, should we only
+		 * create dsmas table instead??
+		 * We assume all dc regions are non-volatile for now.
+		 *
+		 */
+		for (i = 0; i < ct3d->dc.num_regions; i++) {
+			rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent])
+					, dsmad_handle++
+					, CDAT_DSMAS_FLAG_NV|CDAT_DSMAS_FLAG_DYNAMIC_CAP
+					, region_base, ct3d->dc.regions[i].len);
+			if (rc < 0)
+				goto error_cleanup;
+			ct3d->dc.regions[i].dsmadhandle = dsmad_handle-1;
+
+			cur_ent += CT3_CDAT_NUM_ENTRIES;
+			region_base += ct3d->dc.regions[i].len;
+		}
+	}
 
-    if (nonvolatile_mr) {
-        rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
-                nonvolatile_mr, true, (volatile_mr ? volatile_mr->size : 0));
-        if (rc < 0) {
-            goto error_cleanup;
-        }
-        cur_ent += CT3_CDAT_NUM_ENTRIES;
-    }
     assert(len == cur_ent);
 
     *cdat_table = g_steal_pointer(&table);
@@ -706,6 +742,11 @@  static int cxl_create_toy_regions(CXLType3Dev *ct3d)
 		/* dsmad_handle is set when creating cdat table entries */
 		region->flags = 0;
 
+		region->blk_bitmap = bitmap_new(region->len / region->block_size);
+		if (!region->blk_bitmap)
+			return -1;
+		bitmap_zero(region->blk_bitmap, region->len / region->block_size);
+
 		region_base += region->len;
 	}
 	QTAILQ_INIT(&ct3d->dc.extents);
@@ -713,11 +754,24 @@  static int cxl_create_toy_regions(CXLType3Dev *ct3d)
 	return 0;
 }
 
+static void cxl_destroy_toy_regions(CXLType3Dev *ct3d)
+{
+	int i;
+	struct CXLDCD_Region *region;
+
+	for (i = 0; i < ct3d->dc.num_regions; i++) {
+		region = &ct3d->dc.regions[i];
+		if (region->blk_bitmap)
+			g_free(region->blk_bitmap);
+	}
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
     DeviceState *ds = DEVICE(ct3d);
 
-    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) {
+	if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
+			&& !ct3d->dc.num_regions) {
         error_setg(errp, "at least one memdev property must be set");
         return false;
     } else if (ct3d->hostmem && ct3d->hostpmem) {
@@ -754,7 +808,7 @@  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
         }
         address_space_init(&ct3d->hostvmem_as, vmr, v_name);
         ct3d->cxl_dstate.vmem_size = vmr->size;
-        ct3d->cxl_dstate.mem_size += vmr->size;
+		ct3d->cxl_dstate.static_mem_size += vmr->size;
         g_free(v_name);
     }
 
@@ -777,12 +831,47 @@  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
         }
         address_space_init(&ct3d->hostpmem_as, pmr, p_name);
         ct3d->cxl_dstate.pmem_size = pmr->size;
-        ct3d->cxl_dstate.mem_size += pmr->size;
+		ct3d->cxl_dstate.static_mem_size += pmr->size;
         g_free(p_name);
     }
 
-	if (cxl_create_toy_regions(ct3d))
-		return false;
+	ct3d->dc.total_dynamic_capicity = 0;
+	if (ct3d->dc.host_dc) {
+		MemoryRegion *dc_mr;
+		char *dc_name;
+		uint64_t total_region_size = 0;
+		int i;
+
+		dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
+		if (!dc_mr) {
+			error_setg(errp, "dynamic capacity must have backing device");
+			return false;
+		}
+		/* FIXME: set dc as nonvolatile for now */
+		memory_region_set_nonvolatile(dc_mr, true);
+		memory_region_set_enabled(dc_mr, true);
+		host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
+		if (ds->id) {
+			dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
+		} else {
+			dc_name = g_strdup("cxl-dcd-dpa-dc-space");
+		}
+		address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
+
+		if (cxl_create_toy_regions(ct3d)) {
+			return false;
+		}
+
+		for (i = 0; i < ct3d->dc.num_regions; i++) {
+			total_region_size += ct3d->dc.regions[i].len;
+		}
+		/* Make sure the host backend is large enough to cover all dc range */
+		assert(total_region_size <= dc_mr->size);
+		assert(dc_mr->size % (256*1024*1024) == 0);
+
+		ct3d->dc.total_dynamic_capicity = total_region_size;
+		g_free(dc_name);
+	}
 
     return true;
 }
@@ -890,6 +979,10 @@  err_release_cdat:
 err_free_special_ops:
     g_free(regs->special_ops);
 err_address_space_free:
+	if (ct3d->dc.host_dc) {
+		cxl_destroy_toy_regions(ct3d);
+		address_space_destroy(&ct3d->dc.host_dc_as);
+	}
     if (ct3d->hostpmem) {
         address_space_destroy(&ct3d->hostpmem_as);
     }
@@ -909,6 +1002,10 @@  static void ct3_exit(PCIDevice *pci_dev)
     cxl_doe_cdat_release(cxl_cstate);
     spdm_sock_fini(ct3d->doe_spdm.socket);
     g_free(regs->special_ops);
+	if (ct3d->dc.host_dc) {
+		cxl_destroy_toy_regions(ct3d);
+		address_space_destroy(&ct3d->dc.host_dc_as);
+	}
     if (ct3d->hostpmem) {
         address_space_destroy(&ct3d->hostpmem_as);
     }
@@ -917,6 +1014,100 @@  static void ct3_exit(PCIDevice *pci_dev)
     }
 }
 
+static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+		uint64_t len)
+{
+	int i;
+	CXLDCD_Region *region = NULL;
+
+	if (dpa < ct3d->dc.regions[0].base
+		|| dpa >= ct3d->dc.regions[0].base + ct3d->dc.total_dynamic_capicity)
+		return;
+
+	/*
+	 * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
+	 * Region 0 being used for the lowest DPA of Dynamic Capacity and
+	 * Region 7 for the highest DPA.
+	 * So we check from the last region to find where the dpa belongs.
+	 * access across multiple regions is not allowed.
+	 **/
+	for (i = ct3d->dc.num_regions-1; i >= 0; i--) {
+		region = &ct3d->dc.regions[i];
+		if (dpa >= region->base)
+			break;
+	}
+
+	bitmap_set(region->blk_bitmap, (dpa-region->base)/region->block_size,
+			len/region->block_size);
+}
+
+static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+		uint64_t len)
+{
+	int i;
+	CXLDCD_Region *region = NULL;
+	uint64_t nbits;
+	long nr;
+
+	if (dpa < ct3d->dc.regions[0].base
+		   || dpa >= ct3d->dc.regions[0].base + ct3d->dc.total_dynamic_capicity)
+		return false;
+
+	/*
+	 * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
+	 * Region 0 being used for the lowest DPA of Dynamic Capacity and
+	 * Region 7 for the highest DPA.
+	 * So we check from the last region to find where the dpa belongs.
+	 * access across multiple regions is not allowed.
+	 **/
+	for (i = ct3d->dc.num_regions-1; i >= 0; i--) {
+		region = &ct3d->dc.regions[i];
+		if (dpa >= region->base)
+			break;
+	}
+
+	nr = (dpa-region->base)/region->block_size;
+	nbits = (len + region->block_size-1)/region->block_size;
+	if (find_next_zero_bit(region->blk_bitmap, nr+nbits, nr)
+			>= nr+nbits)
+		return true;
+
+	return false;
+}
+
+static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+		uint64_t len)
+{
+	int i;
+	CXLDCD_Region *region = NULL;
+	uint64_t nbits;
+	long nr;
+
+	if (dpa < ct3d->dc.regions[0].base
+		|| dpa >= ct3d->dc.regions[0].base + ct3d->dc.total_dynamic_capicity)
+		return;
+
+	/*
+	 * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
+	 * Region 0 being used for the lowest DPA of Dynamic Capacity and
+	 * Region 7 for the highest DPA.
+	 * So we check from the last region to find where the dpa belongs.
+	 * access across multiple regions is not allowed.
+	 **/
+	for (i = ct3d->dc.num_regions-1; i >= 0; i--) {
+		region = &ct3d->dc.regions[i];
+		if (dpa >= region->base)
+			break;
+	}
+
+	nr = (dpa-region->base) / region->block_size;
+	nbits = (len + region->block_size-1) / region->block_size;
+	for (i = 0; i < nbits; i++) {
+		clear_bit(nr, region->blk_bitmap);
+		nr++;
+	}
+}
+
 static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
 {
     uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
@@ -973,16 +1164,24 @@  static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
                                        AddressSpace **as,
                                        uint64_t *dpa_offset)
 {
-    MemoryRegion *vmr = NULL, *pmr = NULL;
+	MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
+	uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
 
     if (ct3d->hostvmem) {
         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
+		vmr_size = int128_get64(vmr->size);
     }
     if (ct3d->hostpmem) {
         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
+		pmr_size = int128_get64(pmr->size);
     }
+	if (ct3d->dc.host_dc) {
+		dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
+		/* Do we want dc_size to be dc_mr->size or not?? */
+		dc_size = ct3d->dc.total_dynamic_capicity;
+	}
 
-    if (!vmr && !pmr) {
+	if (!vmr && !pmr && !dc_mr) {
         return -ENODEV;
     }
 
@@ -990,20 +1189,22 @@  static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
         return -EINVAL;
     }
 
-    if (*dpa_offset > int128_get64(ct3d->cxl_dstate.mem_size)) {
+    if (*dpa_offset >= vmr_size + pmr_size + dc_size ||
+       (*dpa_offset >= vmr_size + pmr_size && ct3d->dc.num_regions == 0)) {
         return -EINVAL;
     }
 
-    if (vmr) {
-        if (*dpa_offset < int128_get64(vmr->size)) {
-            *as = &ct3d->hostvmem_as;
-        } else {
-            *as = &ct3d->hostpmem_as;
-            *dpa_offset -= vmr->size;
-        }
-    } else {
-        *as = &ct3d->hostpmem_as;
-    }
+	if (*dpa_offset < vmr_size)
+		*as = &ct3d->hostvmem_as;
+	else if (*dpa_offset < vmr_size + pmr_size) {
+		*as = &ct3d->hostpmem_as;
+		*dpa_offset -= vmr_size;
+	} else {
+		if (!test_region_block_backed(ct3d, *dpa_offset, size))
+			return -ENODEV;
+		*as = &ct3d->dc.host_dc_as;
+		*dpa_offset -= (vmr_size + pmr_size);
+	}
 
     return 0;
 }
@@ -1069,6 +1270,8 @@  static Property ct3_props[] = {
     DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
     DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
 	DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
+	DEFINE_PROP_LINK("dc-memdev", CXLType3Dev, dc.host_dc,
+					TYPE_MEMORY_BACKEND, HostMemoryBackend *),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1135,34 +1338,41 @@  static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
 
 static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
 {
-    MemoryRegion *vmr = NULL, *pmr = NULL;
+	MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
     AddressSpace *as;
+	uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
 
     if (ct3d->hostvmem) {
         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
+		vmr_size = int128_get64(vmr->size);
     }
     if (ct3d->hostpmem) {
         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
+		pmr_size = int128_get64(pmr->size);
     }
+	if (ct3d->dc.host_dc) {
+		dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
+		dc_size = ct3d->dc.total_dynamic_capicity;
+	}
 
-    if (!vmr && !pmr) {
+    if (!vmr && !pmr && !dc_mr) {
         return false;
     }
 
-    if (dpa_offset + 64 > int128_get64(ct3d->cxl_dstate.mem_size)) {
-        return false;
-    }
+	if (dpa_offset >= vmr_size + pmr_size + dc_size)
+		return false;
+	if (dpa_offset + 64 >= vmr_size + pmr_size && ct3d->dc.num_regions == 0)
+		return false;
 
-    if (vmr) {
-        if (dpa_offset <= int128_get64(vmr->size)) {
-            as = &ct3d->hostvmem_as;
-        } else {
-            as = &ct3d->hostpmem_as;
-            dpa_offset -= vmr->size;
-        }
-    } else {
-        as = &ct3d->hostpmem_as;
-    }
+	if (dpa_offset < vmr_size) {
+		as = &ct3d->hostvmem_as;
+	} else if (dpa_offset < vmr_size + pmr_size) {
+		as = &ct3d->hostpmem_as;
+		dpa_offset -= vmr->size;
+	} else {
+		as = &ct3d->dc.host_dc_as;
+		dpa_offset -= (vmr_size + pmr_size);
+	}
 
     address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, &data, 64);
     return true;
@@ -1711,6 +1921,14 @@  static void qmp_cxl_process_dynamic_capacity_event(const char *path, CxlEventLog
 		memcpy(&dCap.dynamic_capacity_extent, &extents[i]
 				, sizeof(CXLDCExtent_raw));
 
+		if (dCap.type == 0x0)
+			set_region_block_backed(dcd, extents[i].start_dpa, extents[i].len);
+		else if (dCap.type == 0x1)
+			clear_region_block_backed(dcd, extents[i].start_dpa,
+					extents[i].len);
+		else
+			error_setg(errp, "DC event not support yet, no bitmap op");
+
 		if (cxl_event_insert(cxlds, CXL_EVENT_TYPE_DYNAMIC_CAP,
 					(CXLEventRecordRaw *)&dCap)) {
 			;
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index c0c8fcc24b..d9b6776e2c 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -211,7 +211,7 @@  typedef struct cxl_device_state {
     } timestamp;
 
     /* memory region size, HDM */
-    uint64_t mem_size;
+	uint64_t static_mem_size;
     uint64_t pmem_size;
     uint64_t vmem_size;
 
@@ -412,6 +412,7 @@  typedef struct CXLDCD_Region {
 	uint64_t block_size;
 	uint32_t dsmadhandle;
 	uint8_t flags;
+	unsigned long *blk_bitmap;
 } CXLDCD_Region;
 
 struct CXLType3Dev {
@@ -447,12 +448,17 @@  struct CXLType3Dev {
     uint64_t poison_list_overflow_ts;
 
 	struct dynamic_capacity {
+		HostMemoryBackend *host_dc;
+		AddressSpace host_dc_as;
+
+		uint8_t num_hosts; //Table 7-55
 		uint8_t num_regions; // 1-8
 		struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
 		CXLDCDExtentList extents;
 
 		uint32_t total_extent_count;
 		uint32_t ext_list_gen_seq;
+		uint64_t total_dynamic_capicity; // 256M aligned
 	} dc;
 };