
Explanation:
To correctly register a table from an external CSV source with pipe-delimited fields and a header row, the Spark SQL DDL should use the USING CSV clause followed by OPTIONS specifying header = "true" and delimiter = "|". The correct syntax is:
CREATE TABLE table_identifier (col_name1 col_type1, …)
USING data_source
OPTIONS (key1 = val1, key2 = val2, …)
LOCATION = path
CREATE TABLE table_identifier (col_name1 col_type1, …)
USING data_source
OPTIONS (key1 = val1, key2 = val2, …)
LOCATION = path
Options are passed with keys as unquoted text and values in quotes. The correct answer demonstrates this syntax, ensuring the table is created with the specified options and location.
Ultimate access to all questions.
No comments yet.
A data engineer needs to register a table named orders from an external CSV source where fields are pipe-delimited (|) and the first row contains the column headers. Which of the following Spark SQL DDL statements correctly creates this table?
A
CREATE TABLE orders_csv(
order_id LONG,
order_customer_id INTEGER,
order_timestamp TIMESTAMP,
total_item_quantity INTEGER,
order_amount DOUBLE,
order_status STRING
)USING CSV
TABLEPROPERTIES (
header = “false“,
delimiter = “|“
)
LOCATION “dbfs:/user/username/retail/orders-csv“
CREATE TABLE orders_csv(
order_id LONG,
order_customer_id INTEGER,
order_timestamp TIMESTAMP,
total_item_quantity INTEGER,
order_amount DOUBLE,
order_status STRING
)USING CSV
TABLEPROPERTIES (
header = “false“,
delimiter = “|“
)
LOCATION “dbfs:/user/username/retail/orders-csv“
B
CREATE TABLE orders_csv(
order_id LONG,
order_customer_id INTEGER,
order_timestamp TIMESTAMP,
total_item_quantity INTEGER,
order_amount DOUBLE,
order_status STRING
)STORED AS CSV
PROPERTIES (
header = “true“,
delimiter = “|“
)
LOCATION “dbfs:/user/username/retail/orders-csv“
CREATE TABLE orders_csv(
order_id LONG,
order_customer_id INTEGER,
order_timestamp TIMESTAMP,
total_item_quantity INTEGER,
order_amount DOUBLE,
order_status STRING
)STORED AS CSV
PROPERTIES (
header = “true“,
delimiter = “|“
)
LOCATION “dbfs:/user/username/retail/orders-csv“
C
CREATE CSV TABLE orders_csv (
order_id LONG,
order_customer_id INTEGER,
order_timestamp TIMESTAMP,
total_item_quantity INTEGER,
order_amount DOUBLE,
order_status STRING
)
OPTIONS (
header = “false“,
delimiter = “|“
)
LOCATION “dbfs:/user/username/retail/orders-csv“
CREATE CSV TABLE orders_csv (
order_id LONG,
order_customer_id INTEGER,
order_timestamp TIMESTAMP,
total_item_quantity INTEGER,
order_amount DOUBLE,
order_status STRING
)
OPTIONS (
header = “false“,
delimiter = “|“
)
LOCATION “dbfs:/user/username/retail/orders-csv“
D
CREATE TABLE orders_csv USING CSV(
order_id LONG,
order_customer_id INTEGER,
order_timestamp TIMESTAMP,
total_item_quantity INTEGER,
order_amount DOUBLE,
order_status STRING
)
OPTIONS (
header = “true“,
delimiter = “|“
)
LOCATION “dbfs:/user/username/retail/orders-csv“
CREATE TABLE orders_csv USING CSV(
order_id LONG,
order_customer_id INTEGER,
order_timestamp TIMESTAMP,
total_item_quantity INTEGER,
order_amount DOUBLE,
order_status STRING
)
OPTIONS (
header = “true“,
delimiter = “|“
)
LOCATION “dbfs:/user/username/retail/orders-csv“
E
CREATE TABLE orders_csv(
order_id LONG,
order_customer_id INTEGER,
order_timestamp TIMESTAMP,
total_item_quantity INTEGER,
order_amount DOUBLE,
order_status STRING
)USING CSV
OPTIONS (
header = “true“,
delimiter = “|“
)
LOCATION “dbfs:/user/username/retail/orders-csv“
CREATE TABLE orders_csv(
order_id LONG,
order_customer_id INTEGER,
order_timestamp TIMESTAMP,
total_item_quantity INTEGER,
order_amount DOUBLE,
order_status STRING
)USING CSV
OPTIONS (
header = “true“,
delimiter = “|“
)
LOCATION “dbfs:/user/username/retail/orders-csv“