apache · WenliangCao · Jun 21, 2026 · Jun 21, 2026
diff --git a/scripts/builtin/powerTransform.dml b/scripts/builtin/powerTransform.dml
@@ -0,0 +1,158 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+# Power transformation using the Yeo-Johnson method.
+# Reduces feature skewness by estimating and applying an optimal transformation parameter for each column.
+#
+# INPUT:
+# -------------------------------------------------------------------------------------
+#   X    Input feature matrix of shape n-by-m
+# -------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# -------------------------------------------------------------------------------------
+#   Y    Power-transformed matrix of shape n-by-m
+#   lambdas  Estimated lambda parameters of shape 1-by-m, one per column
+# -------------------------------------------------------------------------------------
+
+m_powerTransform = function(Matrix[Double] X)
+  return (Matrix[Double] Y, Matrix[Double] lambdas)
+{
+  m = ncol(X)
+  lambdas = matrix(1.0, rows=1, cols=m) # Initialize first, then replace each column with the best lambdas
+
+
+# Estimate lambda for each column separately
+  for (j in 1:m){
+    x = X[,j]
+
+    # For constant columns, variance is 0, so skip optimization and use lambda = 1
+    if (max(x) == min(x)) {
+        lambdas[1,j] = 1.0;
+    }
+    else{
+      lambdas[1,j]= ptEstimateLambda(x);
+    }
+}
+# Pass the parameters to Apply
+  Y = powerTransformApply(X, lambdas);
+}
+
+
+
+
+
+ptEstimateLambda = function(Matrix[Double] x)
+    return (Double lambda)
+{
+    lower = -2.0;
+    upper = 2.0;
+
+    lambda = ptGoldenSectionSearch(x, lower, upper);
+}
+
+# Compute negative log likelihood; lower lambda score is better
+
+ptYJNegLogLikelihood = function(
+    Matrix[Double] x,
+    Double lambda)
+  return (Double negLogLikelihood)
+{
+    # powerTransformApply needs lambda as a matrix
+    lambdaMatrix = matrix(lambda, rows=1, cols=1);
+
+    # Apply one YJ transform with this lambda and use the temporary y for scoring
+    y = powerTransformApply(x, lambdaMatrix);
+
+    # Safety check; give a huge penalty score when variance is below 0
+    n = nrow(x);
+    yMean = mean(y);
+    yVariance = sum((y - yMean)^2) / n;
+
+    if (yVariance <= 0.0) {
+        negLogLikelihood = 1e300
+    }
+
+    # Start scoring after the safety check passes
+    # The objective has two parts
+
+    else {
+        # Variance Term
+        logLikelihood = -n / 2.0 * log(yVariance);
+
+        # Jacobian term
+        jacobian = (lambda - 1.0) * sum(sign(x) * log(abs(x) + 1.0));
+
+        # Combine
+        logLikelihood = logLikelihood + jacobian;
+        # Return the negative value
+        negLogLikelihood = -logLikelihood;
+    }
+}
+
+# Temporarily use golden section search for best lambdas; replace with Brent later
+ptGoldenSectionSearch = function(
+    Matrix[Double] x,
+    Double lower,
+    Double upper)
+  return (Double lambdaOptimal)
+{
+  # Golden ratio, tolerance, and max attempts
+  goldenRatio = 0.6180339887498949;
+  tolerance = 1e-6;
+  maxIterations = 100;
+
+  a = lower;
+  b = upper;
+
+  c = b - goldenRatio * (b - a);
+  d = a + goldenRatio * (b - a);
+
+  fc = ptYJNegLogLikelihood(x, c);
+  fd = ptYJNegLogLikelihood(x, d);
+
+  iteration = 0;
+  while (((b - a) > tolerance) & (iteration < maxIterations)) {
+
+    if (fc < fd) {
+      b = d;
+      d = c;
+      fd = fc;
+      c = b - goldenRatio * (b - a);
+      fc = ptYJNegLogLikelihood(x, c);
+    }
+    else {
+      a = c;
+      c = d;
+      fc = fd;
+      d = a + goldenRatio * (b - a);
+      fd = ptYJNegLogLikelihood(x, d);
+    }
+
+    iteration = iteration + 1;
+  }
+
+  # Keep the best one after shrinking
+  if (fc < fd) {
+    lambdaOptimal = c; # Whoever has the lower score wins
+  } else {
+    lambdaOptimal = d;
+  }
+}
diff --git a/scripts/builtin/powerTransformApply.dml b/scripts/builtin/powerTransformApply.dml
@@ -0,0 +1,76 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+# Applies a fitted Yeo-Johnson power transformation.
+# Transforms each feature using its previously estimated lambda parameter.
+#
+# INPUT:
+# -------------------------------------------------------------------------------------
+#   X    Input feature matrix of shape n-by-m
+#   lambdas  Precomputed lambda parameters of shape 1-by-m, one per column
+# -------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# -------------------------------------------------------------------------------------
+#   Y    Power-transformed matrix of shape n-by-m
+# -------------------------------------------------------------------------------------
+
+
+m_powerTransformApply = function(Matrix[Double] X, Matrix[Double] lambdas)
+    return (Matrix[Double] Y)
+{
+    n = nrow(X)
+    m = ncol(X)
+
+    Y = matrix(0.0, rows=n, cols=m)
+
+    # Handle boundary points (0 and 2)
+    eps = 1e-12
+
+    # Loop over columns for transformation
+    for (j in 1:m){
+        x = X[,j]
+        lambda_j = as.scalar(lambdas[1,j])
+        nonnegative = x >= 0
+
+        # Use intermediate inputs to avoid invalid domains in scoring or fractional powers
+        x_pos = ifelse(nonnegative, x, 0.0)
+        x_neg = ifelse(nonnegative, 0.0, x)
+
+        # Transform nonnegative values (x>=0)
+        if (abs(lambda_j) < eps) {
+            y_pos = log(x_pos + 1)
+        }
+        else {
+            y_pos = ((x_pos + 1)^lambda_j - 1) / lambda_j
+        }
+
+        # Transform negative values (x<0)
+        if (abs(lambda_j - 2) < eps) {
+            y_neg = -log(1 - x_neg)
+        }
+        else {
+            y_neg = -((1 - x_neg)^(2 - lambda_j) - 1) / (2 - lambda_j)
+        }
+
+        # Combine the two branches
+        Y[,j] = ifelse(nonnegative, y_pos, y_neg)
+    }
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -275,6 +275,8 @@ public enum Builtins {
 	PCATRANSFORM("pcaTransform", true),
 	PNMF("pnmf", true),
 	PPCA("ppca", true),
+	POWERTRANSFORM("powerTransform", true),
+	POWERTRANSFORMAPPLY("powerTransformApply", true),
 	PPRED("ppred", false),
 	PROD("prod", false),
 	PSNR("psnr", true),

diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinPowerTransformTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinPowerTransformTest.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.builtin.part2;
+
+import java.util.HashMap;
+
+import org.junit.Test;
+
+import org.apache.sysds.common.Types.ExecMode;
+import org.apache.sysds.common.Types.ExecType;
+import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+import org.apache.sysds.test.TestUtils;
+
+public class BuiltinPowerTransformTest extends AutomatedTestBase {
+	private static final String TEST_NAME = "powerTransformApply";
+	private static final String TEST_DIR = "functions/builtin/";
+	private static final String TEST_CLASS_DIR =
+		TEST_DIR + BuiltinPowerTransformTest.class.getSimpleName() + "/";
+
+	private static final double EPS = 1e-9;
+
+	@Override
+	public void setUp() {
+		// Register test configuration and declare the test output matrix Y
+		addTestConfiguration(
+			TEST_NAME,
+			new TestConfiguration(
+				TEST_CLASS_DIR,
+				TEST_NAME,
+				new String[] {"Y"}
+			)
+		);
+	}
+
+	@Test
+	public void testPowerTransformApplyDenseCP() {
+		// Only test single-node CP execution mode here
+		runPowerTransformApplyTest(ExecType.CP);
+	}
+
+	private void runPowerTransformApplyTest(ExecType execType) {
+		// Save the old execution mode and restore it after the test
+		ExecMode oldExecMode = setExecMode(execType);
+
+		try {
+			// Load the configuration for this test
+			loadTestConfiguration(getTestConfiguration(TEST_NAME));
+
+			String home = SCRIPT_DIR + TEST_DIR;
+
+			// Set the DML test wrapper and R reference implementation
+			fullDMLScriptName = home + TEST_NAME + ".dml";
+			fullRScriptName = home + TEST_NAME + ".R";
+
+			// DML arguments in order:
+			// 1. Input matrix X
+			// 2. Lambda row matrix L
+			// 3. Output matrix Y
+			programArgs = new String[] {
+				"-exec", "singlenode",
+				"-args",
+				input("X"),
+				input("L"),
+				output("Y")
+			};
+
+			// R script receives the input directory and expected output directory
+			rCmd = "Rscript " + fullRScriptName + " "
+				+ inputDir() + " " + expectedDir();
+
+			// Use the same input values in three columns to test lambda 0, 1, and 2
+			double[][] X = {
+				{-2, -2, -2},
+				{-1, -1, -1},
+				{ 0,  0,  0},
+				{ 1,  1,  1},
+				{ 2,  2,  2}
+			};
+
+			// First column lambda=0, second column lambda=1, third column lambda=2
+			double[][] L = {
+				{0, 1, 2}
+			};
+
+			// Write both input matrices to the test input directory and generate metadata
+			writeInputMatrixWithMTD("X", X, true);
+			writeInputMatrixWithMTD("L", L, true);
+
+			// DML test
+			runTest(true, false, null, -1);
+
+			// R reference implementation
+			runRScript(true);
+
+			// Read the results separately
+			HashMap<CellIndex, Double> dmlResult =
+				readDMLMatrixFromOutputDir("Y");
+
+			HashMap<CellIndex, Double> rResult =
+				readRMatrixFromExpectedDir("Y");
+
+			// Compare the two result matrices within the given tolerance
+			TestUtils.compareMatrices(
+				dmlResult,
+				rResult,
+				EPS,
+				"DML",
+				"R"
+			);
+		}
+		catch (Exception exception) {
+			throw new RuntimeException(exception);
+		}
+		finally {
+			// Restore the old execution mode whether the test succeeds or fails
+			resetExecMode(oldExecMode);
+		}
+	}
+}