Changeset 18009 for branches/3087_Ceres_Integration/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs
 Timestamp:
 07/15/21 12:07:27 (5 months ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

branches/3087_Ceres_Integration/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs
r18006 r18009 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 25 26 using HEAL.Attic; 27 26 28 using HeuristicLab.Common; 27 29 using HeuristicLab.Core; 28 30 using HeuristicLab.Data; 29 31 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 32 using HeuristicLab.NativeInterpreter; 30 33 using HeuristicLab.Optimization; 31 34 using HeuristicLab.Parameters; … … 209 212 bool updateConstantsInTree = true, Action<double[], double, object> iterationCallback = null, EvaluationsCounter counter = null) { 210 213 211 // Numeric constants in the tree become variables for parameter optimization.212 // Variables in the tree become parameters (fixed values) for parameter optimization.213 // For each parameter (variable in the original tree) we store the214 // variable name, variable value (for factor vars) and lag as a DataForVariable object.215 // A dictionary is used to find parameters216 double[] initialConstants;217 var parameters = new List<TreeToAutoDiffTermConverter.DataForVariable>();218 219 TreeToAutoDiffTermConverter.ParametricFunction func;220 TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad;221 if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, applyLinearScaling, out parameters, out initialConstants, out func, out func_grad))222 throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");223 if (parameters.Count == 0) return 0.0; // constant expressions always have a R² of 0.0224 var parameterEntries = parameters.ToArray(); // order of entries must be the same for x225 226 // extract inital constants227 double[] c;228 if (applyLinearScaling) {229 c = new double[initialConstants.Length + 2];230 c[0] = 0.0;231 c[1] = 1.0;232 Array.Copy(initialConstants, 0, c, 2, initialConstants.Length);233 } else {234 c = (double[])initialConstants.Clone();235 }236 237 214 double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling); 238 215 239 if (counter == null) counter = new EvaluationsCounter(); 240 var rowEvaluationsCounter = new EvaluationsCounter(); 241 242 alglib.lsfitstate state; 243 alglib.lsfitreport rep; 244 int retVal; 245 246 IDataset ds = problemData.Dataset; 247 double[,] x = new double[rows.Count(), parameters.Count]; 248 int row = 0; 249 foreach (var r in rows) { 250 int col = 0; 251 foreach (var info in parameterEntries) { 252 if (ds.VariableHasType<double>(info.variableName)) { 253 x[row, col] = ds.GetDoubleValue(info.variableName, r + info.lag); 254 } else if (ds.VariableHasType<string>(info.variableName)) { 255 x[row, col] = ds.GetStringValue(info.variableName, r) == info.variableValue ? 1 : 0; 256 } else throw new InvalidProgramException("found a variable of unknown type"); 257 col++; 258 } 259 row++; 260 } 261 double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray(); 262 int n = x.GetLength(0); 263 int m = x.GetLength(1); 264 int k = c.Length; 265 266 alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(func); 267 alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(func_grad); 268 alglib.ndimensional_rep xrep = (p, f, obj) => iterationCallback(p, f, obj); 269 270 try { 271 alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state); 272 alglib.lsfitsetcond(state, 0.0, maxIterations); 273 alglib.lsfitsetxrep(state, iterationCallback != null); 274 alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, xrep, rowEvaluationsCounter); 275 alglib.lsfitresults(state, out retVal, out c, out rep); 276 } catch (ArithmeticException) { 277 return originalQuality; 278 } catch (alglib.alglibexception) { 279 return originalQuality; 280 } 281 282 counter.FunctionEvaluations += rowEvaluationsCounter.FunctionEvaluations / n; 283 counter.GradientEvaluations += rowEvaluationsCounter.GradientEvaluations / n; 284 285 //retVal == 7 => constant optimization failed due to wrong gradient 286 // 8 => optimizer detected NAN / INF in the target 287 // function and/ or gradient 288 if (retVal != 7 && retVal != 8) { 289 if (applyLinearScaling) { 290 var tmp = new double[c.Length  2]; 291 Array.Copy(c, 2, tmp, 0, tmp.Length); 292 UpdateConstants(tree, tmp, updateVariableWeights); 293 } else UpdateConstants(tree, c, updateVariableWeights); 294 } 216 var nodesToOptimize = new HashSet<ISymbolicExpressionTreeNode>(); 217 var originalNodeValues = new Dictionary<ISymbolicExpressionTreeNode, double>(); 218 219 foreach (var node in tree.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>()) { 220 if (node is VariableTreeNode && !updateVariableWeights) { 221 continue; 222 } 223 if (node is ConstantTreeNode && node.Parent.Symbol is Power && node.Parent.GetSubtree(0) == node) { 224 // do not optimize exponents 225 continue; 226 } 227 nodesToOptimize.Add(node); 228 if (node is ConstantTreeNode constant) { 229 originalNodeValues[node] = constant.Value; 230 } else if (node is VariableTreeNode variable) { 231 originalNodeValues[node] = variable.Weight; 232 } 233 } 234 235 var options = new SolverOptions { 236 Iterations = maxIterations 237 }; 238 var summary = new OptimizationSummary(); 239 var optimizedNodeValues = ParameterOptimizer.OptimizeTree(tree, problemData.Dataset, problemData.TrainingIndices, problemData.TargetVariable, nodesToOptimize, options, ref summary); 240 241 counter.FunctionEvaluations += summary.ResidualEvaluations; 242 counter.GradientEvaluations += summary.JacobianEvaluations; 243 244 // check if the fitting of the parameters was successful 245 UpdateNodeValues(optimizedNodeValues); 246 295 247 var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling); 296 297 if (!updateConstantsInTree) UpdateConstants(tree, initialConstants, updateVariableWeights); 298 299 if (originalQuality  quality > 0.001  double.IsNaN(quality)) { 300 UpdateConstants(tree, initialConstants, updateVariableWeights); 301 return originalQuality; 302 } 303 return quality; 304 } 305 306 private static void UpdateConstants(ISymbolicExpressionTree tree, double[] constants, bool updateVariableWeights) { 307 int i = 0; 308 foreach (var node in tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>()) { 309 ConstantTreeNode constantTreeNode = node as ConstantTreeNode; 310 VariableTreeNodeBase variableTreeNodeBase = node as VariableTreeNodeBase; 311 FactorVariableTreeNode factorVarTreeNode = node as FactorVariableTreeNode; 312 if (constantTreeNode != null) { 313 if (constantTreeNode.Parent.Symbol is Power 314 && constantTreeNode.Parent.GetSubtree(1) == constantTreeNode) continue; // exponents in powers are not optimizated (see TreeToAutoDiffTermConverter) 315 constantTreeNode.Value = constants[i++]; 316 } else if (updateVariableWeights && variableTreeNodeBase != null) 317 variableTreeNodeBase.Weight = constants[i++]; 318 else if (factorVarTreeNode != null) { 319 for (int j = 0; j < factorVarTreeNode.Weights.Length; j++) 320 factorVarTreeNode.Weights[j] = constants[i++]; 321 } 322 } 323 } 324 325 private static alglib.ndimensional_pfunc CreatePFunc(TreeToAutoDiffTermConverter.ParametricFunction func) { 326 return (double[] c, double[] x, ref double fx, object o) => { 327 fx = func(c, x); 328 var counter = (EvaluationsCounter)o; 329 counter.FunctionEvaluations++; 330 }; 331 } 332 333 private static alglib.ndimensional_pgrad CreatePGrad(TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad) { 334 return (double[] c, double[] x, ref double fx, double[] grad, object o) => { 335 var tuple = func_grad(c, x); 336 fx = tuple.Item2; 337 Array.Copy(tuple.Item1, grad, grad.Length); 338 var counter = (EvaluationsCounter)o; 339 counter.GradientEvaluations++; 340 }; 341 } 248 if (quality < originalQuality  !updateConstantsInTree) { 249 UpdateNodeValues(originalNodeValues); 250 } 251 return Math.Max(quality, originalQuality); 252 } 253 254 private static void UpdateNodeValues(IDictionary<ISymbolicExpressionTreeNode, double> values) { 255 foreach (var item in values) { 256 var node = item.Key; 257 if (node is ConstantTreeNode constant) { 258 constant.Value = item.Value; 259 } else if (node is VariableTreeNode variable) { 260 variable.Weight = item.Value; 261 } 262 } 263 } 264 342 265 public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) { 343 266 return TreeToAutoDiffTermConverter.IsCompatible(tree);
Note: See TracChangeset
for help on using the changeset viewer.