diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 45deda18ed322ef60af2ddb5bdebf8ec6a8b2448..02cc2c38b44baf8ef37c3dd9c4b4d7806181e3bb 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -101,25 +101,40 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
 				      BIT(CXL_CM_CAP_CAP_ID_HDM));
 }
 
-static struct cxl_hdm *devm_cxl_setup_emulated_hdm(struct cxl_port *port,
-						   struct cxl_endpoint_dvsec_info *info)
+static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
 {
-	struct device *dev = &port->dev;
 	struct cxl_hdm *cxlhdm;
+	void __iomem *hdm;
+	u32 ctrl;
+	int i;
 
-	if (!info->mem_enabled)
-		return ERR_PTR(-ENODEV);
+	if (!info)
+		return false;
 
-	cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
-	if (!cxlhdm)
-		return ERR_PTR(-ENOMEM);
+	cxlhdm = dev_get_drvdata(&info->port->dev);
+	hdm = cxlhdm->regs.hdm_decoder;
 
-	cxlhdm->port = port;
-	cxlhdm->decoder_count = info->ranges;
-	cxlhdm->target_count = info->ranges;
-	dev_set_drvdata(&port->dev, cxlhdm);
+	if (!hdm)
+		return true;
 
-	return cxlhdm;
+	/*
+	 * If HDM decoders are present and the driver is in control of
+	 * Mem_Enable skip DVSEC based emulation
+	 */
+	if (!info->mem_enabled)
+		return false;
+
+	/*
+	 * If any decoders are committed already, there should not be any
+	 * emulated DVSEC decoders.
+	 */
+	for (i = 0; i < cxlhdm->decoder_count; i++) {
+		ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
+		if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl))
+			return false;
+	}
+
+	return true;
 }
 
 /**
@@ -138,13 +153,14 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
 	cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
 	if (!cxlhdm)
 		return ERR_PTR(-ENOMEM);
-
 	cxlhdm->port = port;
-	crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
-	if (!crb) {
-		if (info && info->mem_enabled)
-			return devm_cxl_setup_emulated_hdm(port, info);
+	dev_set_drvdata(dev, cxlhdm);
 
+	crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
+	if (!crb && info && info->mem_enabled) {
+		cxlhdm->decoder_count = info->ranges;
+		return cxlhdm;
+	} else if (!crb) {
 		dev_err(dev, "No component registers mapped\n");
 		return ERR_PTR(-ENXIO);
 	}
@@ -160,7 +176,15 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
 		return ERR_PTR(-ENXIO);
 	}
 
-	dev_set_drvdata(dev, cxlhdm);
+	/*
+	 * Now that the hdm capability is parsed, decide if range
+	 * register emulation is needed and fixup cxlhdm accordingly.
+	 */
+	if (should_emulate_decoders(info)) {
+		dev_dbg(dev, "Fallback map %d range register%s\n", info->ranges,
+			info->ranges > 1 ? "s" : "");
+		cxlhdm->decoder_count = info->ranges;
+	}
 
 	return cxlhdm;
 }
@@ -714,14 +738,20 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld)
 	return 0;
 }
 
-static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port,
-					    struct cxl_decoder *cxld, int which,
-					    struct cxl_endpoint_dvsec_info *info)
+static int cxl_setup_hdm_decoder_from_dvsec(
+	struct cxl_port *port, struct cxl_decoder *cxld, u64 *dpa_base,
+	int which, struct cxl_endpoint_dvsec_info *info)
 {
+	struct cxl_endpoint_decoder *cxled;
+	u64 len;
+	int rc;
+
 	if (!is_cxl_endpoint(port))
 		return -EOPNOTSUPP;
 
-	if (!range_len(&info->dvsec_range[which]))
+	cxled = to_cxl_endpoint_decoder(&cxld->dev);
+	len = range_len(&info->dvsec_range[which]);
+	if (!len)
 		return -ENOENT;
 
 	cxld->target_type = CXL_DECODER_EXPANDER;
@@ -736,40 +766,24 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port,
 	cxld->flags |= CXL_DECODER_F_ENABLE | CXL_DECODER_F_LOCK;
 	port->commit_end = cxld->id;
 
-	return 0;
-}
-
-static bool should_emulate_decoders(struct cxl_port *port)
-{
-	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
-	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
-	u32 ctrl;
-	int i;
-
-	if (!is_cxl_endpoint(cxlhdm->port))
-		return false;
-
-	if (!hdm)
-		return true;
-
-	/*
-	 * If any decoders are committed already, there should not be any
-	 * emulated DVSEC decoders.
-	 */
-	for (i = 0; i < cxlhdm->decoder_count; i++) {
-		ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
-		if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl))
-			return false;
+	rc = devm_cxl_dpa_reserve(cxled, *dpa_base, len, 0);
+	if (rc) {
+		dev_err(&port->dev,
+			"decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)",
+			port->id, cxld->id, *dpa_base, *dpa_base + len - 1, rc);
+		return rc;
 	}
+	*dpa_base += len;
+	cxled->state = CXL_DECODER_STATE_AUTO;
 
-	return true;
+	return 0;
 }
 
 static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 			    int *target_map, void __iomem *hdm, int which,
 			    u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
 {
-	struct cxl_endpoint_decoder *cxled = NULL;
+	struct cxl_endpoint_decoder *cxled;
 	u64 size, base, skip, dpa_size;
 	bool committed;
 	u32 remainder;
@@ -780,11 +794,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 		unsigned char target_id[8];
 	} target_list;
 
-	if (should_emulate_decoders(port))
-		return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info);
-
-	if (is_endpoint_decoder(&cxld->dev))
-		cxled = to_cxl_endpoint_decoder(&cxld->dev);
+	if (should_emulate_decoders(info))
+		return cxl_setup_hdm_decoder_from_dvsec(port, cxld, dpa_base,
+							which, info);
 
 	ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
 	base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
@@ -806,9 +818,6 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 		.end = base + size - 1,
 	};
 
-	if (cxled && !committed && range_len(&info->dvsec_range[which]))
-		return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info);
-
 	/* decoders are enabled if committed */
 	if (committed) {
 		cxld->flags |= CXL_DECODER_F_ENABLE;
@@ -846,7 +855,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 	if (rc)
 		return rc;
 
-	if (!cxled) {
+	if (!info) {
 		target_list.value =
 			ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
 		for (i = 0; i < cxld->interleave_ways; i++)
@@ -866,6 +875,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 		return -ENXIO;
 	}
 	skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
+	cxled = to_cxl_endpoint_decoder(&cxld->dev);
 	rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
 	if (rc) {
 		dev_err(&port->dev,
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 7328a255241138d8d2f0f963b7139a8281a82c0a..523d5b9fd7fcf2a1fb0b62c85dabac13c63b3c00 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -462,7 +462,7 @@ static struct pci_doe_mb *find_cdat_doe(struct device *uport)
 	return NULL;
 }
 
-#define CDAT_DOE_REQ(entry_handle)					\
+#define CDAT_DOE_REQ(entry_handle) cpu_to_le32				\
 	(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE,			\
 		    CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) |		\
 	 FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE,			\
@@ -475,8 +475,8 @@ static void cxl_doe_task_complete(struct pci_doe_task *task)
 }
 
 struct cdat_doe_task {
-	u32 request_pl;
-	u32 response_pl[32];
+	__le32 request_pl;
+	__le32 response_pl[32];
 	struct completion c;
 	struct pci_doe_task task;
 };
@@ -510,10 +510,10 @@ static int cxl_cdat_get_length(struct device *dev,
 		return rc;
 	}
 	wait_for_completion(&t.c);
-	if (t.task.rv < sizeof(u32))
+	if (t.task.rv < 2 * sizeof(__le32))
 		return -EIO;
 
-	*length = t.response_pl[1];
+	*length = le32_to_cpu(t.response_pl[1]);
 	dev_dbg(dev, "CDAT length %zu\n", *length);
 
 	return 0;
@@ -524,13 +524,13 @@ static int cxl_cdat_read_table(struct device *dev,
 			       struct cxl_cdat *cdat)
 {
 	size_t length = cdat->length;
-	u32 *data = cdat->table;
+	__le32 *data = cdat->table;
 	int entry_handle = 0;
 
 	do {
 		DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
+		struct cdat_entry_header *entry;
 		size_t entry_dw;
-		u32 *entry;
 		int rc;
 
 		rc = pci_doe_submit_task(cdat_doe, &t.task);
@@ -539,26 +539,34 @@ static int cxl_cdat_read_table(struct device *dev,
 			return rc;
 		}
 		wait_for_completion(&t.c);
-		/* 1 DW header + 1 DW data min */
-		if (t.task.rv < (2 * sizeof(u32)))
+
+		/* 1 DW Table Access Response Header + CDAT entry */
+		entry = (struct cdat_entry_header *)(t.response_pl + 1);
+		if ((entry_handle == 0 &&
+		     t.task.rv != sizeof(__le32) + sizeof(struct cdat_header)) ||
+		    (entry_handle > 0 &&
+		     (t.task.rv < sizeof(__le32) + sizeof(*entry) ||
+		      t.task.rv != sizeof(__le32) + le16_to_cpu(entry->length))))
 			return -EIO;
 
 		/* Get the CXL table access header entry handle */
 		entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
-					 t.response_pl[0]);
-		entry = t.response_pl + 1;
-		entry_dw = t.task.rv / sizeof(u32);
+					 le32_to_cpu(t.response_pl[0]));
+		entry_dw = t.task.rv / sizeof(__le32);
 		/* Skip Header */
 		entry_dw -= 1;
-		entry_dw = min(length / sizeof(u32), entry_dw);
+		entry_dw = min(length / sizeof(__le32), entry_dw);
 		/* Prevent length < 1 DW from causing a buffer overflow */
 		if (entry_dw) {
-			memcpy(data, entry, entry_dw * sizeof(u32));
-			length -= entry_dw * sizeof(u32);
+			memcpy(data, entry, entry_dw * sizeof(__le32));
+			length -= entry_dw * sizeof(__le32);
 			data += entry_dw;
 		}
 	} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
 
+	/* Length in CDAT header may exceed concatenation of CDAT entries */
+	cdat->length -= length;
+
 	return 0;
 }
 
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index c2e4b10937884a7ec3cc1266f395ce2dedcbe209..f8c38d9972522a605e9bf94f436343f62df0e64f 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -62,9 +62,9 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
 	return is_cxl_nvdimm_bridge(dev);
 }
 
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *start)
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd)
 {
-	struct cxl_port *port = find_cxl_root(start);
+	struct cxl_port *port = find_cxl_root(dev_get_drvdata(&cxlmd->dev));
 	struct device *dev;
 
 	if (!port)
@@ -253,7 +253,7 @@ int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
 	struct device *dev;
 	int rc;
 
-	cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
+	cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
 	if (!cxl_nvb)
 		return -ENODEV;
 
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 8ee6b6e2e2a4edd698e6812e75fa68902ff833fe..4d1f9c5b5029a594835b0ed251fe229648cc4853 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -823,41 +823,17 @@ static bool dev_is_cxl_root_child(struct device *dev)
 	return false;
 }
 
-/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */
-static int match_root_child(struct device *dev, const void *match)
+struct cxl_port *find_cxl_root(struct cxl_port *port)
 {
-	const struct device *iter = NULL;
-	struct cxl_dport *dport;
-	struct cxl_port *port;
-
-	if (!dev_is_cxl_root_child(dev))
-		return 0;
-
-	port = to_cxl_port(dev);
-	iter = match;
-	while (iter) {
-		dport = cxl_find_dport_by_dev(port, iter);
-		if (dport)
-			break;
-		iter = iter->parent;
-	}
-
-	return !!iter;
-}
+	struct cxl_port *iter = port;
 
-struct cxl_port *find_cxl_root(struct device *dev)
-{
-	struct device *port_dev;
-	struct cxl_port *root;
+	while (iter && !is_cxl_root(iter))
+		iter = to_cxl_port(iter->dev.parent);
 
-	port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child);
-	if (!port_dev)
+	if (!iter)
 		return NULL;
-
-	root = to_cxl_port(port_dev->parent);
-	get_device(&root->dev);
-	put_device(port_dev);
-	return root;
+	get_device(&iter->dev);
+	return iter;
 }
 EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL);
 
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index f29028148806baa2982eaf5b3a9e92840f8bff92..b2fd67fcebfb5742ce2095d062b7201075e4b10f 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -134,9 +134,13 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
 		struct cxl_endpoint_decoder *cxled = p->targets[i];
 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
 		struct cxl_port *iter = cxled_to_port(cxled);
+		struct cxl_dev_state *cxlds = cxlmd->cxlds;
 		struct cxl_ep *ep;
 		int rc = 0;
 
+		if (cxlds->rcd)
+			goto endpoint_reset;
+
 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
 			iter = to_cxl_port(iter->dev.parent);
 
@@ -153,6 +157,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
 				return rc;
 		}
 
+endpoint_reset:
 		rc = cxled->cxld.reset(&cxled->cxld);
 		if (rc)
 			return rc;
@@ -1199,6 +1204,7 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)
 {
 	struct cxl_region_params *p = &cxlr->params;
 	struct cxl_endpoint_decoder *cxled;
+	struct cxl_dev_state *cxlds;
 	struct cxl_memdev *cxlmd;
 	struct cxl_port *iter;
 	struct cxl_ep *ep;
@@ -1214,6 +1220,10 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)
 	for (i = 0; i < p->nr_targets; i++) {
 		cxled = p->targets[i];
 		cxlmd = cxled_to_memdev(cxled);
+		cxlds = cxlmd->cxlds;
+
+		if (cxlds->rcd)
+			continue;
 
 		iter = cxled_to_port(cxled);
 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
@@ -1229,14 +1239,24 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
 {
 	struct cxl_region_params *p = &cxlr->params;
 	struct cxl_endpoint_decoder *cxled;
+	struct cxl_dev_state *cxlds;
+	int i, rc, rch = 0, vh = 0;
 	struct cxl_memdev *cxlmd;
 	struct cxl_port *iter;
 	struct cxl_ep *ep;
-	int i, rc;
 
 	for (i = 0; i < p->nr_targets; i++) {
 		cxled = p->targets[i];
 		cxlmd = cxled_to_memdev(cxled);
+		cxlds = cxlmd->cxlds;
+
+		/* validate that all targets agree on topology */
+		if (!cxlds->rcd) {
+			vh++;
+		} else {
+			rch++;
+			continue;
+		}
 
 		iter = cxled_to_port(cxled);
 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
@@ -1256,6 +1276,12 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
 		}
 	}
 
+	if (rch && vh) {
+		dev_err(&cxlr->dev, "mismatched CXL topologies detected\n");
+		cxl_region_teardown_targets(cxlr);
+		return -ENXIO;
+	}
+
 	return 0;
 }
 
@@ -1648,6 +1674,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
 		if (rc)
 			goto err_decrement;
 		p->state = CXL_CONFIG_ACTIVE;
+		set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
 	}
 
 	cxled->cxld.interleave_ways = p->interleave_ways;
@@ -1749,8 +1776,6 @@ static int attach_target(struct cxl_region *cxlr,
 
 	down_read(&cxl_dpa_rwsem);
 	rc = cxl_region_attach(cxlr, cxled, pos);
-	if (rc == 0)
-		set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
 	up_read(&cxl_dpa_rwsem);
 	up_write(&cxl_region_rwsem);
 	return rc;
@@ -2251,7 +2276,7 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
 		 * bridge for one device is the same for all.
 		 */
 		if (i == 0) {
-			cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
+			cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
 			if (!cxl_nvb) {
 				cxlr_pmem = ERR_PTR(-ENODEV);
 				goto out;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index f2b0962a552d582cd190f028cc54f76cfbc9cffb..044a92d9813e239d1b97e7a4935f36850c30e154 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -658,7 +658,7 @@ struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port);
 struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
 				   resource_size_t component_reg_phys,
 				   struct cxl_dport *parent_dport);
-struct cxl_port *find_cxl_root(struct device *dev);
+struct cxl_port *find_cxl_root(struct cxl_port *port);
 int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
 void cxl_bus_rescan(void);
 void cxl_bus_drain(void);
@@ -695,13 +695,15 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
 
 /**
  * struct cxl_endpoint_dvsec_info - Cached DVSEC info
- * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE
+ * @mem_enabled: cached value of mem_enabled in the DVSEC at init time
  * @ranges: Number of active HDM ranges this device uses.
+ * @port: endpoint port associated with this info instance
  * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE
  */
 struct cxl_endpoint_dvsec_info {
 	bool mem_enabled;
 	int ranges;
+	struct cxl_port *port;
 	struct range dvsec_range[2];
 };
 
@@ -758,7 +760,7 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
 bool is_cxl_nvdimm(struct device *dev);
 bool is_cxl_nvdimm_bridge(struct device *dev);
 int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev);
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd);
 
 #ifdef CONFIG_CXL_REGION
 bool is_cxl_pmem_region(struct device *dev);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index be6a2ef3cce3738a45cc19d8c9cb54aff93af693..0465ef963cd6a0b23dae3cfb7177d9a033dcde18 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -68,6 +68,20 @@ enum cxl_regloc_type {
 	CXL_REGLOC_RBI_TYPES
 };
 
+struct cdat_header {
+	__le32 length;
+	u8 revision;
+	u8 checksum;
+	u8 reserved[6];
+	__le32 sequence;
+} __packed;
+
+struct cdat_entry_header {
+	u8 type;
+	u8 reserved;
+	__le16 length;
+} __packed;
+
 int devm_cxl_port_enumerate_dports(struct cxl_port *port);
 struct cxl_dev_state;
 int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 1049bb5ea496129177e3db06fae3b64198548483..22a7ab2bae7c7e53878faf3b372718f50ea1457f 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -78,8 +78,8 @@ static int cxl_switch_port_probe(struct cxl_port *port)
 
 static int cxl_endpoint_port_probe(struct cxl_port *port)
 {
+	struct cxl_endpoint_dvsec_info info = { .port = port };
 	struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
-	struct cxl_endpoint_dvsec_info info = { 0 };
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 	struct cxl_hdm *cxlhdm;
 	struct cxl_port *root;
@@ -119,7 +119,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
 	 * This can't fail in practice as CXL root exit unregisters all
 	 * descendant ports and that in turn synchronizes with cxl_port_probe()
 	 */
-	root = find_cxl_root(&cxlmd->dev);
+	root = find_cxl_root(port);
 
 	/*
 	 * Now that all endpoint decoders are successfully enumerated, try to
diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
index 66d9ab2886468d02a32eb40353ad5bb24909f3ac..e5e9b287b9766e3c164bb2c2a6c1bcca72a59c96 100644
--- a/drivers/pci/doe.c
+++ b/drivers/pci/doe.c
@@ -128,7 +128,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
 		return -EIO;
 
 	/* Length is 2 DW of header + length of payload in DW */
-	length = 2 + task->request_pl_sz / sizeof(u32);
+	length = 2 + task->request_pl_sz / sizeof(__le32);
 	if (length > PCI_DOE_MAX_LENGTH)
 		return -EIO;
 	if (length == PCI_DOE_MAX_LENGTH)
@@ -141,9 +141,9 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
 	pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
 			       FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH,
 					  length));
-	for (i = 0; i < task->request_pl_sz / sizeof(u32); i++)
+	for (i = 0; i < task->request_pl_sz / sizeof(__le32); i++)
 		pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
-				       task->request_pl[i]);
+				       le32_to_cpu(task->request_pl[i]));
 
 	pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_GO);
 
@@ -195,11 +195,11 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
 
 	/* First 2 dwords have already been read */
 	length -= 2;
-	payload_length = min(length, task->response_pl_sz / sizeof(u32));
+	payload_length = min(length, task->response_pl_sz / sizeof(__le32));
 	/* Read the rest of the response payload */
 	for (i = 0; i < payload_length; i++) {
-		pci_read_config_dword(pdev, offset + PCI_DOE_READ,
-				      &task->response_pl[i]);
+		pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
+		task->response_pl[i] = cpu_to_le32(val);
 		/* Prior to the last ack, ensure Data Object Ready */
 		if (i == (payload_length - 1) && !pci_doe_data_obj_ready(doe_mb))
 			return -EIO;
@@ -217,13 +217,14 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
 	if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
 		return -EIO;
 
-	return min(length, task->response_pl_sz / sizeof(u32)) * sizeof(u32);
+	return min(length, task->response_pl_sz / sizeof(__le32)) * sizeof(__le32);
 }
 
 static void signal_task_complete(struct pci_doe_task *task, int rv)
 {
 	task->rv = rv;
 	task->complete(task);
+	destroy_work_on_stack(&task->work);
 }
 
 static void signal_task_abort(struct pci_doe_task *task, int rv)
@@ -317,14 +318,16 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
 {
 	u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX,
 				    *index);
+	__le32 request_pl_le = cpu_to_le32(request_pl);
+	__le32 response_pl_le;
 	u32 response_pl;
 	DECLARE_COMPLETION_ONSTACK(c);
 	struct pci_doe_task task = {
 		.prot.vid = PCI_VENDOR_ID_PCI_SIG,
 		.prot.type = PCI_DOE_PROTOCOL_DISCOVERY,
-		.request_pl = &request_pl,
+		.request_pl = &request_pl_le,
 		.request_pl_sz = sizeof(request_pl),
-		.response_pl = &response_pl,
+		.response_pl = &response_pl_le,
 		.response_pl_sz = sizeof(response_pl),
 		.complete = pci_doe_task_complete,
 		.private = &c,
@@ -340,6 +343,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
 	if (task.rv != sizeof(response_pl))
 		return -EIO;
 
+	response_pl = le32_to_cpu(response_pl_le);
 	*vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl);
 	*protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL,
 			      response_pl);
@@ -520,6 +524,8 @@ EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
  * task->complete will be called when the state machine is done processing this
  * task.
  *
+ * @task must be allocated on the stack.
+ *
  * Excess data will be discarded.
  *
  * RETURNS: 0 when task has been successfully queued, -ERRNO on error
@@ -533,15 +539,15 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
 	 * DOE requests must be a whole number of DW and the response needs to
 	 * be big enough for at least 1 DW
 	 */
-	if (task->request_pl_sz % sizeof(u32) ||
-	    task->response_pl_sz < sizeof(u32))
+	if (task->request_pl_sz % sizeof(__le32) ||
+	    task->response_pl_sz < sizeof(__le32))
 		return -EINVAL;
 
 	if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
 		return -EIO;
 
 	task->doe_mb = doe_mb;
-	INIT_WORK(&task->work, doe_statemachine_work);
+	INIT_WORK_ONSTACK(&task->work, doe_statemachine_work);
 	queue_work(doe_mb->work_queue, &task->work);
 	return 0;
 }
diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h
index ed9b4df792b8858be22221ec77f24abafce9cc75..43765eaf2342c6996db747a14d9dcef89d6993e8 100644
--- a/include/linux/pci-doe.h
+++ b/include/linux/pci-doe.h
@@ -34,6 +34,10 @@ struct pci_doe_mb;
  * @work: Used internally by the mailbox
  * @doe_mb: Used internally by the mailbox
  *
+ * Payloads are treated as opaque byte streams which are transmitted verbatim,
+ * without byte-swapping.  If payloads contain little-endian register values,
+ * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu().
+ *
  * The payload sizes and rv are specified in bytes with the following
  * restrictions concerning the protocol.
  *
@@ -45,9 +49,9 @@ struct pci_doe_mb;
  */
 struct pci_doe_task {
 	struct pci_doe_protocol prot;
-	u32 *request_pl;
+	__le32 *request_pl;
 	size_t request_pl_sz;
-	u32 *response_pl;
+	__le32 *response_pl;
 	size_t response_pl_sz;
 	int rv;
 	void (*complete)(struct pci_doe_task *task);