
Ultimate access to all questions.
A data engineer needs to create a PySpark DataFrame transformation that applies a custom function to a column of integers called sales in order to return the square of each value. Which of the following code blocks correctly defines and applies a Spark UDF to accomplish this task?
A
def square(x):
return x * x
square_udf = udf(square, IntegerType())
df = df.withColumn('sales_squared', square_udf(df.sales))
def square(x):
return x * x
square_udf = udf(square, IntegerType())
df = df.withColumn('sales_squared', square_udf(df.sales))
B
def square(x):
return x * x
df = df.withColumn('sales_squared', square(df.sales))
def square(x):
return x * x
df = df.withColumn('sales_squared', square(df.sales))
C
def square(x):
return x * x
df = df.withColumn('sales_squared', udf(square, IntegerType())('sales'))
def square(x):
return x * x
df = df.withColumn('sales_squared', udf(square, IntegerType())('sales'))
D
def square(x):
return x * x
square_udf = udf(square, IntegerType)
df = df.withColumn('sales_squared', square_udf('sales'))
def square(x):
return x * x
square_udf = udf(square, IntegerType)
df = df.withColumn('sales_squared', square_udf('sales'))
E
def square(x):
return x ** 2
square_udf = udf(square, IntegerType())
df = df.withColumn('sales_squared', square_udf('sales'))
def square(x):
return x ** 2
square_udf = udf(square, IntegerType())
df = df.withColumn('sales_squared', square_udf('sales'))