Scalar User Defined Functions (UDFs)
To define the properties of a user-defined function, the user can use some of the methods defined in this class.
asNonNullable(): UserDefinedFunction
asNondeterministic(): UserDefinedFunction
Updates UserDefinedFunction to nondeterministic.
-
Updates UserDefinedFunction with a given name.
Find full example code at “examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedScalar.scala” in the Spark repo.
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import static org.apache.spark.sql.functions.udf;
import org.apache.spark.sql.types.DataTypes;
SparkSession spark = SparkSession
.builder()
.appName("Java Spark SQL UDF scalar example")
.getOrCreate();
// Define and register a zero-argument non-deterministic UDF
// UDF is deterministic by default, i.e. produces the same result for the same input.
UserDefinedFunction random = udf(
() -> Math.random(), DataTypes.DoubleType
random.asNondeterministic();
spark.udf().register("random", random);
spark.sql("SELECT random()").show();
// +-------+
// +-------+
// |xxxxxxx|
// +-------+
// Define and register a one-argument UDF
spark.udf().register("plusOne",
(UDF1<Integer, Integer>) x -> x + 1, DataTypes.IntegerType);
spark.sql("SELECT plusOne(5)").show();
// +----------+
// |plusOne(5)|
// +----------+
// | 6|
// +----------+
// Define and register a two-argument UDF
UserDefinedFunction strLen = udf(
(String s, Integer x) -> s.length() + x, DataTypes.IntegerType
spark.udf().register("strLen", strLen);
// +------------+
// |UDF(test, 1)|
// +------------+
// | 5|
// +------------+
// UDF in a WHERE clause
spark.udf().register("oneArgFilter",
(UDF1<Long, Boolean>) x -> x > 5, DataTypes.BooleanType);
spark.range(1, 10).createOrReplaceTempView("test");
spark.sql("SELECT * FROM test WHERE oneArgFilter(id)").show();
// +---+
// | id|
// +---+
// | 6|
// | 7|
// | 8|
// | 9|
// +---+