
Ultimate access to all questions.
You have a Delta Lake table named customer_purchases with duplicate records based on customer_id. Your goal is to deduplicate these records by retaining only the latest purchase record for each customer, determined by purchase_date. How would you use the MERGE statement in Databricks SQL to accomplish this task?
A
MERGE INTO customer_purchases AS target
USING customer_purchases AS source
ON target.customer_id = source.customer_id
WHEN MATCHED AND target.purchase_date < source.purchase_date THEN DELETE;
MERGE INTO customer_purchases AS target
USING customer_purchases AS source
ON target.customer_id = source.customer_id
WHEN MATCHED AND target.purchase_date < source.purchase_date THEN DELETE;
B
MERGE INTO customer_purchases AS target
USING (
SELECT customer_id, purchase_date
FROM customer_purchases
WHERE purchase_date = (SELECT MAX(purchase_date) FROM customer_purchases GROUP BY customer_id)
) AS source
ON target.customer_id = source.customer_id
WHEN MATCHED THEN UPDATE SET target.* = source.*
WHEN NOT MATCHED THEN INSERT *;
MERGE INTO customer_purchases AS target
USING (
SELECT customer_id, purchase_date
FROM customer_purchases
WHERE purchase_date = (SELECT MAX(purchase_date) FROM customer_purchases GROUP BY customer_id)
) AS source
ON target.customer_id = source.customer_id
WHEN MATCHED THEN UPDATE SET target.* = source.*
WHEN NOT MATCHED THEN INSERT *;
C
MERGE INTO customer_purchases AS target
USING (
SELECT customer_id, MAX(purchase_date) AS latest_purchase_date
FROM customer_purchases
GROUP BY customer_id
) AS source
ON target.customer_id = source.customer_id
WHEN MATCHED AND target.purchase_date < source.latest_purchase_date THEN DELETE
WHEN NOT MATCHED THEN INSERT *;
MERGE INTO customer_purchases AS target
USING (
SELECT customer_id, MAX(purchase_date) AS latest_purchase_date
FROM customer_purchases
GROUP BY customer_id
) AS source
ON target.customer_id = source.customer_id
WHEN MATCHED AND target.purchase_date < source.latest_purchase_date THEN DELETE
WHEN NOT MATCHED THEN INSERT *;
D
MERGE INTO customer_purchases AS target
USING (
SELECT customer_id, MAX(purchase_date) AS latest_purchase_date
FROM customer_purchases
GROUP BY customer_id
) AS source
ON target.customer_id = source.customer_id
AND target.purchase_date < source.latest_purchase_date
WHEN MATCHED THEN DELETE;
MERGE INTO customer_purchases AS target
USING (
SELECT customer_id, MAX(purchase_date) AS latest_purchase_date
FROM customer_purchases
GROUP BY customer_id
) AS source
ON target.customer_id = source.customer_id
AND target.purchase_date < source.latest_purchase_date
WHEN MATCHED THEN DELETE;