diff --git a/encoders/src/main/java/au/csiro/pathling/encoders/RowIndexCounter.java b/encoders/src/main/java/au/csiro/pathling/encoders/RowIndexCounter.java new file mode 100644 index 0000000000..99af311768 --- /dev/null +++ b/encoders/src/main/java/au/csiro/pathling/encoders/RowIndexCounter.java @@ -0,0 +1,82 @@ +/* + * Copyright © 2018-2026 Commonwealth Scientific and Industrial Research + * Organisation (CSIRO) ABN 41 687 119 230. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package au.csiro.pathling.encoders; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.Serializable; + +/** + * A thread-safe counter for tracking element positions within recursive tree traversals. Each + * thread gets its own independent counter via {@link ThreadLocal}, ensuring that Spark tasks + * running in parallel on different partitions do not interfere with each other. + * + *

This class is shared across partitions via Spark's {@code addReferenceObj()} mechanism in + * codegen mode. Since reference objects are shared within an executor, {@link ThreadLocal} is + * required to isolate mutable state per task thread. + * + *

This class is {@link Serializable} so that it survives Spark plan serialization to executors. + * The {@link ThreadLocal} is eagerly initialized and re-initialized after deserialization via + * {@link #readObject(ObjectInputStream)}. + * + *

Note: {@link ThreadLocal#remove()} is intentionally not called. The stored value is a single + * {@code int[1]} (16 bytes) that is reset to zero each row via {@link #reset()}. When this object + * becomes unreachable, the {@link ThreadLocal}'s weak-reference key is collected and the stale + * entry is cleaned up lazily by subsequent {@link ThreadLocal} operations on the same thread. + * + * @author Piotr Szul + */ +@SuppressWarnings("java:S5164") // ThreadLocal.remove() not needed — see class Javadoc. +public class RowIndexCounter implements Serializable { + + private static final long serialVersionUID = 1L; + + private transient ThreadLocal counter = ThreadLocal.withInitial(() -> new int[] {0}); + + private void readObject(final ObjectInputStream in) throws IOException, ClassNotFoundException { + in.defaultReadObject(); + counter = ThreadLocal.withInitial(() -> new int[] {0}); + } + + /** + * Returns the current counter value without modifying it. Multiple calls between increments + * return the same value, making this safe to use when the counter is referenced more than once + * per element. + * + * @return the current counter value + */ + public int get() { + return counter.get()[0]; + } + + /** + * Increments the counter without returning a value. This is used to advance the counter after all + * references to the current value have been evaluated. + */ + public void increment() { + counter.get()[0]++; + } + + /** + * Resets the counter to zero for the current thread. This should be called before evaluating each + * top-level row to ensure the index sequence starts fresh. + */ + public void reset() { + counter.get()[0] = 0; + } +} diff --git a/encoders/src/main/java/au/csiro/pathling/encoders/ValueFunctions.java b/encoders/src/main/java/au/csiro/pathling/encoders/ValueFunctions.java index 619fc75183..82d7e2c5e0 100644 --- a/encoders/src/main/java/au/csiro/pathling/encoders/ValueFunctions.java +++ b/encoders/src/main/java/au/csiro/pathling/encoders/ValueFunctions.java @@ -429,4 +429,50 @@ public static Column variantUnwrap( public static Column pruneAnnotations(@Nonnull final Column col) { return column(new PruneSyntheticFields(expression(col))); } + + /** + * Creates a read-only view of a shared {@link RowIndexCounter}. Each evaluation returns the + * current counter value without incrementing it, so multiple references within the same element + * evaluation all see the same value. + * + *

The counter must be advanced separately via {@link #rowCounterIncrement(Column, + * RowIndexCounter)} after all references for a given element have been evaluated. + * + * @param state the shared counter instance + * @return a Column that reads the current counter value without incrementing + */ + @Nonnull + public static Column rowCounterGet(@Nonnull final RowIndexCounter state) { + return column(new RowCounterGet(state)); + } + + /** + * Wraps a column expression so that the shared row counter is incremented after evaluating the + * expression. This should be applied to the extractor result in a repeat projection to ensure the + * counter advances exactly once per element. + * + * @param child the expression to evaluate before incrementing + * @param state the shared counter instance to increment + * @return a Column that evaluates the child and then increments the counter + */ + @Nonnull + public static Column rowCounterIncrement( + @Nonnull final Column child, @Nonnull final RowIndexCounter state) { + return column(new RowCounterIncrement(expression(child), state)); + } + + /** + * Wraps a column expression so that the shared row counter is reset to zero before evaluating the + * expression. This should be applied at the outermost level of a repeat projection to ensure the + * counter starts fresh for each resource row. + * + * @param child the expression to evaluate after resetting + * @param state the shared counter instance to reset + * @return a Column that resets the counter and then evaluates the child + */ + @Nonnull + public static Column resetCounter( + @Nonnull final Column child, @Nonnull final RowIndexCounter state) { + return column(new ResetCounter(expression(child), state)); + } } diff --git a/encoders/src/main/scala/au/csiro/pathling/encoders/Expressions.scala b/encoders/src/main/scala/au/csiro/pathling/encoders/Expressions.scala index d38a9bc3d2..64b278f70b 100644 --- a/encoders/src/main/scala/au/csiro/pathling/encoders/Expressions.scala +++ b/encoders/src/main/scala/au/csiro/pathling/encoders/Expressions.scala @@ -947,4 +947,124 @@ case class UnresolvedVariantUnwrap(inner: Expression, schemaRef: Expression, override def toString: String = s"VariantUnwrap($inner)" } +/** + * A leaf expression that reads the current value of a [[RowIndexCounter]] without incrementing it. + * Multiple references to this expression within the same element evaluation all return the same + * value, making it safe for use when `%rowIndex` is referenced more than once. + * + * The counter must be advanced separately via [[RowCounterIncrement]] after all references for a + * given element have been evaluated. + * + * @param state the shared thread-safe counter + */ +case class RowCounterGet(state: RowIndexCounter) + extends LeafExpression with Nondeterministic { + + override def stateful: Boolean = true + + override def nullable: Boolean = false + + override def dataType: DataType = IntegerType + + override protected def initializeInternal(partitionIndex: Int): Unit = { + // No-op: reset is handled by ResetCounter at the per-row level, not per-partition. + } + + override protected def evalInternal(input: InternalRow): Int = { + state.get() + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val counterRef = ctx.addReferenceObj("rowCounter", state, classOf[RowIndexCounter].getName) + ev.copy(code = code""" + final ${CodeGenerator.javaType(dataType)} ${ev.value} = $counterRef.get();""", + isNull = FalseLiteral) + } + + override def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = { + RowCounterGet(state) + } +} + +/** + * A unary expression that increments a [[RowIndexCounter]] after evaluating its child expression. + * This is used to advance the counter exactly once per element, after all `%rowIndex` references + * (via [[RowCounterGet]]) have been read. + * + * @param child the expression to evaluate before incrementing + * @param state the shared thread-safe counter to increment + */ +case class RowCounterIncrement(child: Expression, state: RowIndexCounter) + extends UnaryExpression with NonSQLExpression { + + override def dataType: DataType = child.dataType + + override def nullable: Boolean = child.nullable + + override protected def nullSafeEval(input: Any): Any = { + // This should not be called — we override eval directly. + throw new UnsupportedOperationException(ExpressionConstants.CODEGEN_ONLY_MSG) + } + + override def eval(input: InternalRow): Any = { + val result = child.eval(input) + state.increment() + result + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val counterRef = ctx.addReferenceObj("rowCounter", state, classOf[RowIndexCounter].getName) + val childEval = child.genCode(ctx) + ev.copy(code = code""" + ${childEval.code} + final boolean ${ev.isNull} = ${childEval.isNull}; + final ${CodeGenerator.javaType(dataType)} ${ev.value} = ${childEval.value}; + $counterRef.increment();""") + } + + override protected def withNewChildInternal(newChild: Expression): Expression = { + RowCounterIncrement(newChild, state) + } +} + +/** + * A unary expression that resets a [[RowIndexCounter]]'s shared state to zero before evaluating its + * child expression. This ensures the counter starts fresh for each row when used inside + * per-row array transformations. + * + * @param child the expression to evaluate after resetting + * @param state the shared thread-safe counter to reset + */ +case class ResetCounter(child: Expression, state: RowIndexCounter) + extends UnaryExpression with NonSQLExpression { + + override def dataType: DataType = child.dataType + + override def nullable: Boolean = child.nullable + + override protected def nullSafeEval(input: Any): Any = { + // This should not be called — we override eval directly. + throw new UnsupportedOperationException(ExpressionConstants.CODEGEN_ONLY_MSG) + } + + override def eval(input: InternalRow): Any = { + state.reset() + child.eval(input) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val counterRef = ctx.addReferenceObj("rowCounter", state, classOf[RowIndexCounter].getName) + val childEval = child.genCode(ctx) + ev.copy(code = code""" + $counterRef.reset(); + ${childEval.code} + final boolean ${ev.isNull} = ${childEval.isNull}; + final ${CodeGenerator.javaType(dataType)} ${ev.value} = ${childEval.value};""") + } + + override protected def withNewChildInternal(newChild: Expression): Expression = { + ResetCounter(newChild, state) + } +} + // ColumnFunctions has been moved to a Java class to access package-private Spark methods diff --git a/encoders/src/test/java/au/csiro/pathling/encoders/ExpressionsBothModesTest.java b/encoders/src/test/java/au/csiro/pathling/encoders/ExpressionsBothModesTest.java index 5f011236d9..7e6f2bd083 100644 --- a/encoders/src/test/java/au/csiro/pathling/encoders/ExpressionsBothModesTest.java +++ b/encoders/src/test/java/au/csiro/pathling/encoders/ExpressionsBothModesTest.java @@ -292,4 +292,69 @@ void testStructProductInlineWithUnsafeRowData() { assertEquals(expected.get(i), actual.get(i), "Row " + i + " mismatch"); } } + + /** + * Tests that {@link ValueFunctions#rowCounterGet}, {@link ValueFunctions#rowCounterIncrement}, + * and {@link ValueFunctions#resetCounter} work together to assign sequential indices within an + * array transform and reset between rows. + */ + @Test + void testRowCounterExpressions() { + // Create a dataset with two rows, each containing an array of structs. + final StructType itemType = + DataTypes.createStructType( + new StructField[] { + new StructField("value", DataTypes.StringType, true, Metadata.empty()) + }); + final StructType schema = + DataTypes.createStructType( + new StructField[] { + new StructField("id", DataTypes.StringType, true, Metadata.empty()), + new StructField("items", DataTypes.createArrayType(itemType), true, Metadata.empty()) + }); + + final List data = + Arrays.asList( + RowFactory.create( + "r1", + Arrays.asList( + RowFactory.create("a"), RowFactory.create("b"), RowFactory.create("c"))), + RowFactory.create("r2", Arrays.asList(RowFactory.create("x"), RowFactory.create("y")))); + + final Dataset ds = spark.createDataFrame(data, schema).repartition(1); + + // Build a transform that assigns a row index to each array element using the counter + // expressions. + final RowIndexCounter counter = new RowIndexCounter(); + final Column indexCol = ValueFunctions.rowCounterGet(counter); + + // Transform each item: struct(value, index), then increment the counter. + final Column transformed = + functions.transform( + col("items"), + item -> + ValueFunctions.rowCounterIncrement( + struct(item.getField("value").alias("value"), indexCol.alias("idx")), counter)); + + // Wrap with resetCounter so the index restarts at zero for each row. + final Column withReset = ValueFunctions.resetCounter(transformed, counter); + + final Dataset result = ds.select(col("id"), withReset.alias("indexed_items")); + final List rows = result.collectAsList(); + + assertEquals(2, rows.size()); + + // Row 1: three items with indices 0, 1, 2. + final List items1 = rows.get(0).getList(1); + assertEquals(3, items1.size()); + assertEquals(0, items1.get(0).getInt(1)); + assertEquals(1, items1.get(1).getInt(1)); + assertEquals(2, items1.get(2).getInt(1)); + + // Row 2: two items with indices 0, 1 (counter was reset). + final List items2 = rows.get(1).getList(1); + assertEquals(2, items2.size()); + assertEquals(0, items2.get(0).getInt(1)); + assertEquals(1, items2.get(1).getInt(1)); + } } diff --git a/encoders/src/test/java/au/csiro/pathling/encoders/RowIndexCounterTest.java b/encoders/src/test/java/au/csiro/pathling/encoders/RowIndexCounterTest.java new file mode 100644 index 0000000000..c114357269 --- /dev/null +++ b/encoders/src/test/java/au/csiro/pathling/encoders/RowIndexCounterTest.java @@ -0,0 +1,101 @@ +/* + * Copyright © 2018-2026 Commonwealth Scientific and Industrial Research + * Organisation (CSIRO) ABN 41 687 119 230. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package au.csiro.pathling.encoders; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import org.junit.jupiter.api.Test; + +/** Tests for {@link RowIndexCounter}. */ +class RowIndexCounterTest { + + @Test + void getReturnsZeroInitially() { + final RowIndexCounter counter = new RowIndexCounter(); + assertEquals(0, counter.get()); + } + + @Test + void incrementAdvancesCounter() { + final RowIndexCounter counter = new RowIndexCounter(); + counter.increment(); + assertEquals(1, counter.get()); + counter.increment(); + assertEquals(2, counter.get()); + } + + @Test + void getIsIdempotentBetweenIncrements() { + final RowIndexCounter counter = new RowIndexCounter(); + counter.increment(); + assertEquals(1, counter.get()); + assertEquals(1, counter.get()); + } + + @Test + void resetSetsCounterToZero() { + final RowIndexCounter counter = new RowIndexCounter(); + counter.increment(); + counter.increment(); + assertEquals(2, counter.get()); + counter.reset(); + assertEquals(0, counter.get()); + } + + @Test + void threadLocalIsolation() throws Exception { + final RowIndexCounter counter = new RowIndexCounter(); + counter.increment(); + counter.increment(); + + // A different thread should see its own independent counter starting at zero. + final int[] otherThreadValue = new int[1]; + final Thread thread = new Thread(() -> otherThreadValue[0] = counter.get()); + thread.start(); + thread.join(); + + assertEquals(0, otherThreadValue[0]); + assertEquals(2, counter.get()); + } + + @Test + void serializationRestoresCounter() throws Exception { + final RowIndexCounter counter = new RowIndexCounter(); + counter.increment(); + + // Serialize and deserialize. + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + try (final ObjectOutputStream oos = new ObjectOutputStream(bos)) { + oos.writeObject(counter); + } + final ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + final RowIndexCounter deserialized; + try (final ObjectInputStream ois = new ObjectInputStream(bis)) { + deserialized = (RowIndexCounter) ois.readObject(); + } + + // Deserialized counter should start fresh at zero. + assertEquals(0, deserialized.get()); + deserialized.increment(); + assertEquals(1, deserialized.get()); + } +} diff --git a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/column/ColumnRepresentation.java b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/column/ColumnRepresentation.java index 266981adcc..203e53b397 100644 --- a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/column/ColumnRepresentation.java +++ b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/column/ColumnRepresentation.java @@ -33,6 +33,7 @@ import jakarta.annotation.Nonnull; import jakarta.annotation.Nullable; import java.util.Optional; +import java.util.function.BiFunction; import java.util.function.BinaryOperator; import java.util.function.UnaryOperator; import java.util.stream.Stream; @@ -375,6 +376,21 @@ public ColumnRepresentation transform(final UnaryOperator lambda) { c -> functions.transform(c, lambda::apply), c -> when(c.isNotNull(), lambda.apply(c))); } + /** + * Transforms the current {@link ColumnRepresentation} using a lambda that receives both the + * element and its 0-based index within the array. + * + * @param lambda the function to apply to each element and its index + * @return a new {@link ColumnRepresentation} that is transformed + */ + @Nonnull + public ColumnRepresentation transformWithIndex( + @Nonnull final BiFunction lambda) { + return vectorize( + c -> functions.transform(c, lambda::apply), + c -> when(c.isNotNull(), lambda.apply(c, lit(0)))); + } + /** * Aggregates the current {@link ColumnRepresentation} using a zero value and an aggregator * function. diff --git a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/evaluation/SingleResourceEvaluator.java b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/evaluation/SingleResourceEvaluator.java index 0092a6dd8c..4968fcce47 100644 --- a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/evaluation/SingleResourceEvaluator.java +++ b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/evaluation/SingleResourceEvaluator.java @@ -26,6 +26,7 @@ import au.csiro.pathling.fhirpath.variable.EnvironmentVariableResolver; import au.csiro.pathling.fhirpath.variable.VariableResolverChain; import jakarta.annotation.Nonnull; +import java.util.HashMap; import java.util.Map; import lombok.AccessLevel; import lombok.AllArgsConstructor; @@ -91,6 +92,22 @@ public static SingleResourceEvaluator of( /** The FHIRPath evaluation configuration. */ @Nonnull private final FhirpathConfiguration configuration; + /** + * Creates a new SingleResourceEvaluator with an additional variable added to the variable map. + * + * @param name the variable name + * @param value the variable value as a Collection + * @return a new SingleResourceEvaluator with the additional variable + */ + @Nonnull + public SingleResourceEvaluator withVariable( + @Nonnull final String name, @Nonnull final Collection value) { + final Map newVariables = new HashMap<>(variables); + newVariables.put(name, value); + return new SingleResourceEvaluator( + resourceResolver, functionRegistry, newVariables, configuration); + } + /** * Evaluates a FHIRPath expression with the default input context. * diff --git a/fhirpath/src/main/java/au/csiro/pathling/projection/ProjectionContext.java b/fhirpath/src/main/java/au/csiro/pathling/projection/ProjectionContext.java index 3c30c2775c..98106ad999 100644 --- a/fhirpath/src/main/java/au/csiro/pathling/projection/ProjectionContext.java +++ b/fhirpath/src/main/java/au/csiro/pathling/projection/ProjectionContext.java @@ -17,9 +17,12 @@ package au.csiro.pathling.projection; +import static org.apache.spark.sql.functions.lit; + import au.csiro.pathling.fhirpath.FhirPath; import au.csiro.pathling.fhirpath.collection.Collection; import au.csiro.pathling.fhirpath.collection.EmptyCollection; +import au.csiro.pathling.fhirpath.collection.IntegerCollection; import au.csiro.pathling.fhirpath.column.DefaultRepresentation; import au.csiro.pathling.fhirpath.evaluation.SingleResourceEvaluator; import jakarta.annotation.Nonnull; @@ -30,14 +33,29 @@ * Dependencies and logic relating to the traversal of FHIRPath expressions. * *

This context holds an evaluator for FHIRPath expressions and the current input context for - * expression evaluation. + * expression evaluation. It also carries the current row index for use within forEach/forEachOrNull + * iterations. * * @param evaluator an evaluator for FHIRPath expressions (produces Column expressions) * @param inputContext the initial context for evaluation + * @param rowIndex the current 0-based element index within a forEach/forEachOrNull iteration * @author Piotr Szul */ public record ProjectionContext( - @Nonnull SingleResourceEvaluator evaluator, @Nonnull Collection inputContext) { + @Nonnull SingleResourceEvaluator evaluator, + @Nonnull Collection inputContext, + @Nonnull Column rowIndex) { + + /** + * Creates a new ProjectionContext with the default row index of 0. + * + * @param evaluator an evaluator for FHIRPath expressions + * @param inputContext the initial context for evaluation + */ + public ProjectionContext( + @Nonnull final SingleResourceEvaluator evaluator, @Nonnull final Collection inputContext) { + this(evaluator, inputContext, lit(0)); + } /** * Creates a new ProjectionContext with a different input context. @@ -47,7 +65,18 @@ public record ProjectionContext( */ @Nonnull public ProjectionContext withInputContext(@Nonnull final Collection inputContext) { - return new ProjectionContext(evaluator, inputContext); + return new ProjectionContext(evaluator, inputContext, rowIndex); + } + + /** + * Creates a new ProjectionContext with a different row index. + * + * @param rowIndex the new row index column + * @return a new ProjectionContext with the specified row index + */ + @Nonnull + public ProjectionContext withRowIndex(@Nonnull final Column rowIndex) { + return new ProjectionContext(evaluator, inputContext, rowIndex); } /** @@ -94,15 +123,23 @@ public ProjectionContext withEmptyInput() { return withInputContext(EmptyCollection.getInstance()); } + /** The name of the row index environment variable. */ + public static final String ROW_INDEX_VARIABLE = "rowIndex"; + /** * Evaluates the given FHIRPath path and returns the result as a collection. * + *

The evaluation includes the current row index as the {@code %rowIndex} environment variable. + * * @param path the path to evaluate * @return the result as a collection */ @Nonnull public Collection evalExpression(@Nonnull final FhirPath path) { - return evaluator.evaluate(path, inputContext); + return evaluator + .withVariable( + ROW_INDEX_VARIABLE, IntegerCollection.build(new DefaultRepresentation(rowIndex))) + .evaluate(path, inputContext); } /** diff --git a/fhirpath/src/main/java/au/csiro/pathling/projection/RepeatSelection.java b/fhirpath/src/main/java/au/csiro/pathling/projection/RepeatSelection.java index 2d0b1879a9..c5f503396c 100644 --- a/fhirpath/src/main/java/au/csiro/pathling/projection/RepeatSelection.java +++ b/fhirpath/src/main/java/au/csiro/pathling/projection/RepeatSelection.java @@ -19,12 +19,14 @@ import static org.apache.spark.sql.functions.concat; +import au.csiro.pathling.encoders.RowIndexCounter; import au.csiro.pathling.encoders.ValueFunctions; import au.csiro.pathling.fhirpath.FhirPath; import au.csiro.pathling.fhirpath.collection.Collection; import au.csiro.pathling.fhirpath.column.DefaultRepresentation; import jakarta.annotation.Nonnull; import java.util.List; +import java.util.function.UnaryOperator; import java.util.stream.Collectors; import org.apache.spark.sql.Column; import org.hl7.fhir.r4.model.Enumerations.FHIRDefinedType; @@ -55,6 +57,13 @@ public record RepeatSelection( @Override public ProjectionResult evaluate(@Nonnull final ProjectionContext context) { + // Create a shared counter for the %rowIndex environment variable. The counter is split into + // read and increment operations: all %rowIndex references within a single element read the + // same value (via rowCounterGet), and the counter advances exactly once per element (via + // rowCounterIncrement wrapping the extractor result). + final RowIndexCounter rowIndexCounter = new RowIndexCounter(); + final Column rowIndexCol = ValueFunctions.rowCounterGet(rowIndexCounter); + // Evaluate each path to get collections, retaining them for type inspection. final List pathCollections = paths.stream().map(context::evalExpression).toList(); @@ -66,11 +75,13 @@ public ProjectionResult evaluate(@Nonnull final ProjectionContext context) { .anyMatch( c -> c.getFhirType().map(t -> !FHIRDefinedType.EXTENSION.equals(t)).orElse(true)); - // Create the list of non-empty starting contexts from the evaluated path collections. + // Create the list of non-empty starting contexts from the evaluated path collections. The row + // index counter is injected so that %rowIndex resolves to the global element position. final List startingNodes = pathCollections.stream() .filter(Collection::isNotEmpty) .map(context::withInputContext) + .map(ctx -> ctx.withRowIndex(rowIndexCol)) .toList(); // Map starting nodes to transformTree expressions and concatenate the results. @@ -82,15 +93,18 @@ public ProjectionResult evaluate(@Nonnull final ProjectionContext context) { ctx.inputContext().getColumnValue(), c -> ValueFunctions.emptyArrayIfMissingField( - component.evaluateElementWise(ctx.withInputColumn(c))), + evaluateElementWiseWithIncrement( + ctx.withInputColumn(c), rowIndexCounter)), paths.stream().map(ctx::asColumnOperator).toList(), maxDepth, errorOnDepthExhaustion)) .toArray(Column[]::new); + // Wrap the concatenated result with a counter reset so that the %rowIndex sequence restarts at + // zero for each resource row. final Column result = nodeResults.length > 0 - ? concat(nodeResults) + ? ValueFunctions.resetCounter(concat(nodeResults), rowIndexCounter) : DefaultRepresentation.empty() .plural() .transform(component.asColumnOperator(context.withEmptyInput())) @@ -104,6 +118,25 @@ public ProjectionResult evaluate(@Nonnull final ProjectionContext context) { return component.evaluate(schemaContext).withResultColumn(result); } + /** + * Evaluates the component clause element-wise, wrapping each per-element result with a counter + * increment. This ensures the shared row index counter advances exactly once per array element, + * after all {@code %rowIndex} references within that element have been read. + * + * @param context the projection context for evaluation + * @param counter the shared counter to increment after each element + * @return the resulting column after element-wise evaluation with per-element increment + */ + @Nonnull + private Column evaluateElementWiseWithIncrement( + @Nonnull final ProjectionContext context, @Nonnull final RowIndexCounter counter) { + final UnaryOperator elementOperator = component.asColumnOperator(context); + return new DefaultRepresentation(context.inputContext().getColumnValue()) + .transform(c -> ValueFunctions.rowCounterIncrement(elementOperator.apply(c), counter)) + .flatten() + .getValue(); + } + /** * Returns the FHIRPath expression representation of this repeat selection. * diff --git a/fhirpath/src/main/java/au/csiro/pathling/projection/UnnestingSelection.java b/fhirpath/src/main/java/au/csiro/pathling/projection/UnnestingSelection.java index f82d8a12b2..a403c39cfa 100644 --- a/fhirpath/src/main/java/au/csiro/pathling/projection/UnnestingSelection.java +++ b/fhirpath/src/main/java/au/csiro/pathling/projection/UnnestingSelection.java @@ -19,6 +19,7 @@ import au.csiro.pathling.fhirpath.FhirPath; import au.csiro.pathling.fhirpath.collection.Collection; +import au.csiro.pathling.fhirpath.column.DefaultRepresentation; import jakarta.annotation.Nonnull; import org.apache.spark.sql.Column; @@ -30,6 +31,9 @@ * clause to each element of that collection. The results are flattened into a single array. When * multiple projections are needed, wrap them in a {@link GroupingSelection} first. * + *

The {@code %rowIndex} environment variable is set to the 0-based index of each element during + * iteration. Each nesting level maintains its own independent {@code %rowIndex} value. + * * @param path the FHIRPath expression that identifies the collection to unnest * @param component the projection clause to apply to each element (use GroupingSelection for * multiple) @@ -48,7 +52,18 @@ public ProjectionResult evaluate(@Nonnull final ProjectionContext context) { // Evaluate the path to get the collection that will serve as the basis for unnesting. final Collection unnestingCollection = context.evalExpression(path); final ProjectionContext unnestingContext = context.withInputContext(unnestingCollection); - final Column columnResult = component.evaluateElementWise(unnestingContext); + + // Use the indexed transform to track the element index as %rowIndex. + final Column columnResult = + new DefaultRepresentation(unnestingContext.inputContext().getColumnValue()) + .transformWithIndex( + (element, index) -> + component + .evaluate(unnestingContext.withInputColumn(element).withRowIndex(index)) + .getResultColumn()) + .flatten() + .getValue(); + return component .evaluate(unnestingContext.asStubContext()) .withResultColumn(columnResult) diff --git a/fhirpath/src/test/java/au/csiro/pathling/views/FhirViewTest.java b/fhirpath/src/test/java/au/csiro/pathling/views/FhirViewTest.java index 006c68db5c..83dfed03cd 100644 --- a/fhirpath/src/test/java/au/csiro/pathling/views/FhirViewTest.java +++ b/fhirpath/src/test/java/au/csiro/pathling/views/FhirViewTest.java @@ -33,6 +33,7 @@ import static org.junit.jupiter.api.Assumptions.assumeFalse; import static scala.jdk.javaapi.CollectionConverters.asScala; +import au.csiro.pathling.config.QueryConfiguration; import au.csiro.pathling.encoders.FhirEncoders; import au.csiro.pathling.encoders.datatypes.DecimalCustomCoder; import au.csiro.pathling.io.source.DataSource; @@ -515,9 +516,18 @@ void test(@Nonnull final TestParameters parameters) { throw e; } - // Create a new executor and build the query. + // Create a new executor with a reduced traversal depth (4 instead of the default + // 10) to keep Spark plan complexity manageable. Nested repeat-in-repeat tests + // compound the plan depth, and the default of 10 exceeds Spark's analyzer iteration + // limit (100). A depth of 4 is sufficient for the current test data, which nests + // extensions at most 4 levels deep. If future tests require deeper traversal, + // increase + // this value but be aware of the Spark analyzer limit. final FhirViewExecutor executor = - new FhirViewExecutor(fhirContext, parameters.sourceData()); + new FhirViewExecutor( + fhirContext, + parameters.sourceData(), + QueryConfiguration.builder().maxUnboundTraversalDepth(4).build()); return executor.buildQuery(view); }); } diff --git a/fhirpath/src/test/resources/viewTests/rowindex.json b/fhirpath/src/test/resources/viewTests/rowindex.json new file mode 100644 index 0000000000..0c036bb429 --- /dev/null +++ b/fhirpath/src/test/resources/viewTests/rowindex.json @@ -0,0 +1,656 @@ +{ + "title": "%rowIndex tests", + "resources": [ + { + "resourceType": "Patient", + "id": "pt1", + "name": [ + { + "family": "Smith", + "given": ["John", "James"] + }, + { + "family": "Jones", + "given": ["Jane"] + } + ], + "contact": [ + { + "telecom": [ + { "system": "phone", "value": "555-0001" }, + { "system": "email", "value": "a@b.com" } + ] + }, + { + "telecom": [{ "system": "phone", "value": "555-0002" }] + } + ] + }, + { + "resourceType": "Patient", + "id": "pt2", + "name": [ + { + "family": "Brown", + "given": ["Bob"] + } + ] + }, + { + "resourceType": "Patient", + "id": "pt3", + "gender": "male" + }, + { + "resourceType": "Patient", + "id": "pt4", + "extension": [ + { + "url": "urn:ext1", + "extension": [ + { + "url": "urn:ext2", + "extension": [ + { + "url": "urn:ext3", + "extension": [ + { + "url": "urn:ext4", + "valueString": "leaf" + } + ] + } + ] + } + ] + } + ] + }, + { + "resourceType": "Patient", + "id": "pt5", + "extension": [ + { + "url": "urn:branch-root", + "extension": [ + { + "url": "urn:branch-child1", + "extension": [ + { + "url": "urn:branch-grandchild", + "valueString": "deep" + } + ] + }, + { + "url": "urn:branch-child2", + "valueString": "shallow" + } + ] + } + ] + }, + { + "resourceType": "Patient", + "id": "pt6", + "extension": [ + { + "url": "urn:fe1", + "extension": [ + { + "url": "urn:fe1.1", + "valueString": "v1" + } + ] + }, + { + "url": "urn:fe2", + "extension": [ + { + "url": "urn:fe2.1", + "valueString": "v2" + } + ] + } + ] + } + ], + "tests": [ + { + "title": "spec example - capturing element position", + "view": { + "resource": "Patient", + "select": [ + { + "column": [{ "name": "id", "path": "id" }] + }, + { + "forEach": "name", + "column": [ + { "name": "name_index", "path": "%rowIndex" }, + { "name": "family", "path": "family" } + ] + } + ] + }, + "expect": [ + { "id": "pt1", "name_index": 0, "family": "Smith" }, + { "id": "pt1", "name_index": 1, "family": "Jones" }, + { "id": "pt2", "name_index": 0, "family": "Brown" } + ] + }, + { + "title": "spec example - nested iteration with independent indices", + "view": { + "resource": "Patient", + "select": [ + { + "column": [{ "name": "id", "path": "id" }] + }, + { + "forEach": "contact", + "column": [{ "name": "contact_index", "path": "%rowIndex" }], + "select": [ + { + "forEach": "telecom", + "column": [ + { "name": "telecom_index", "path": "%rowIndex" }, + { "name": "system", "path": "system" } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "pt1", + "contact_index": 0, + "telecom_index": 0, + "system": "phone" + }, + { + "id": "pt1", + "contact_index": 0, + "telecom_index": 1, + "system": "email" + }, + { + "id": "pt1", + "contact_index": 1, + "telecom_index": 0, + "system": "phone" + } + ] + }, + { + "title": "spec example - row index with unionAll", + "view": { + "resource": "Patient", + "select": [ + { + "column": [{ "name": "id", "path": "id" }] + }, + { + "forEach": "name", + "column": [{ "name": "name_index", "path": "%rowIndex" }], + "select": [ + { + "unionAll": [ + { + "forEach": "given", + "column": [ + { "name": "given_index", "path": "%rowIndex" }, + { "name": "value", "path": "$this" } + ] + }, + { + "column": [ + { "name": "given_index", "path": "%rowIndex" }, + { "name": "value", "path": "family" } + ] + } + ] + } + ] + } + ] + }, + "expect": [ + { "id": "pt1", "name_index": 0, "given_index": 0, "value": "John" }, + { "id": "pt1", "name_index": 0, "given_index": 1, "value": "James" }, + { "id": "pt1", "name_index": 0, "given_index": 0, "value": "Smith" }, + { "id": "pt1", "name_index": 1, "given_index": 0, "value": "Jane" }, + { "id": "pt1", "name_index": 1, "given_index": 1, "value": "Jones" }, + { "id": "pt2", "name_index": 0, "given_index": 0, "value": "Bob" }, + { "id": "pt2", "name_index": 0, "given_index": 0, "value": "Brown" } + ] + }, + { + "title": "top-level %rowIndex defaults to 0", + "view": { + "resource": "Patient", + "where": [{ "path": "id = 'pt1' or id = 'pt2' or id = 'pt3'" }], + "select": [ + { + "column": [ + { "name": "id", "path": "id" }, + { "name": "row_index", "path": "%rowIndex" } + ] + } + ] + }, + "expect": [ + { "id": "pt1", "row_index": 0 }, + { "id": "pt2", "row_index": 0 }, + { "id": "pt3", "row_index": 0 } + ] + }, + { + "title": "forEach with %rowIndex", + "view": { + "resource": "Patient", + "select": [ + { + "column": [{ "name": "id", "path": "id" }], + "select": [ + { + "forEach": "name", + "column": [ + { "name": "name_index", "path": "%rowIndex" }, + { "name": "family", "path": "family" } + ] + } + ] + } + ] + }, + "expect": [ + { "id": "pt1", "name_index": 0, "family": "Smith" }, + { "id": "pt1", "name_index": 1, "family": "Jones" }, + { "id": "pt2", "name_index": 0, "family": "Brown" } + ] + }, + { + "title": "nested forEach with independent %rowIndex values", + "view": { + "resource": "Patient", + "select": [ + { + "column": [{ "name": "id", "path": "id" }], + "select": [ + { + "forEach": "name", + "column": [{ "name": "name_index", "path": "%rowIndex" }], + "select": [ + { + "forEach": "given", + "column": [ + { "name": "given_index", "path": "%rowIndex" }, + { "name": "given", "path": "$this" } + ] + } + ] + } + ] + } + ] + }, + "expect": [ + { "id": "pt1", "name_index": 0, "given_index": 0, "given": "John" }, + { "id": "pt1", "name_index": 0, "given_index": 1, "given": "James" }, + { "id": "pt1", "name_index": 1, "given_index": 0, "given": "Jane" }, + { "id": "pt2", "name_index": 0, "given_index": 0, "given": "Bob" } + ] + }, + { + "title": "forEachOrNull with %rowIndex", + "view": { + "resource": "Patient", + "where": [{ "path": "id = 'pt1' or id = 'pt2' or id = 'pt3'" }], + "select": [ + { + "column": [{ "name": "id", "path": "id" }], + "select": [ + { + "forEachOrNull": "name", + "column": [ + { "name": "family", "path": "family" }, + { "name": "name_index", "path": "%rowIndex" } + ] + } + ] + } + ] + }, + "expect": [ + { "id": "pt1", "family": "Smith", "name_index": 0 }, + { "id": "pt1", "family": "Jones", "name_index": 1 }, + { "id": "pt2", "family": "Brown", "name_index": 0 }, + { "id": "pt3", "family": null, "name_index": null } + ] + }, + { + "title": "%rowIndex used in arithmetic expression", + "view": { + "resource": "Patient", + "select": [ + { + "column": [{ "name": "id", "path": "id" }], + "select": [ + { + "forEach": "name", + "column": [ + { "name": "family", "path": "family" }, + { "name": "one_based_index", "path": "%rowIndex + 1" } + ] + } + ] + } + ] + }, + "expect": [ + { "id": "pt1", "family": "Smith", "one_based_index": 1 }, + { "id": "pt1", "family": "Jones", "one_based_index": 2 }, + { "id": "pt2", "family": "Brown", "one_based_index": 1 } + ] + }, + { + "title": "repeat with %rowIndex — linear chain", + "view": { + "resource": "Patient", + "where": [{ "path": "id = 'pt4'" }], + "select": [ + { + "column": [{ "name": "id", "path": "id" }] + }, + { + "repeat": ["extension"], + "column": [ + { "name": "row_index", "path": "%rowIndex" }, + { "name": "url", "path": "url", "type": "uri" } + ] + } + ] + }, + "expect": [ + { "id": "pt4", "row_index": 0, "url": "urn:ext1" }, + { "id": "pt4", "row_index": 1, "url": "urn:ext2" }, + { "id": "pt4", "row_index": 2, "url": "urn:ext3" }, + { "id": "pt4", "row_index": 3, "url": "urn:ext4" } + ] + }, + { + "title": "repeat with %rowIndex arithmetic", + "view": { + "resource": "Patient", + "where": [{ "path": "id = 'pt4'" }], + "select": [ + { + "column": [{ "name": "id", "path": "id" }] + }, + { + "repeat": ["extension"], + "column": [ + { "name": "one_based", "path": "%rowIndex + 1" }, + { "name": "url", "path": "url", "type": "uri" } + ] + } + ] + }, + "expect": [ + { "id": "pt4", "one_based": 1, "url": "urn:ext1" }, + { "id": "pt4", "one_based": 2, "url": "urn:ext2" }, + { "id": "pt4", "one_based": 3, "url": "urn:ext3" }, + { "id": "pt4", "one_based": 4, "url": "urn:ext4" } + ] + }, + { + "title": "repeat with %rowIndex and nested forEach", + "view": { + "resource": "Patient", + "where": [{ "path": "id = 'pt4'" }], + "select": [ + { + "column": [{ "name": "id", "path": "id" }] + }, + { + "repeat": ["extension"], + "column": [ + { "name": "repeat_index", "path": "%rowIndex" }, + { "name": "parent_url", "path": "url", "type": "uri" } + ], + "select": [ + { + "forEach": "extension", + "column": [ + { "name": "foreach_index", "path": "%rowIndex" }, + { "name": "child_url", "path": "url", "type": "uri" } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "pt4", + "repeat_index": 0, + "parent_url": "urn:ext1", + "foreach_index": 0, + "child_url": "urn:ext2" + }, + { + "id": "pt4", + "repeat_index": 1, + "parent_url": "urn:ext2", + "foreach_index": 0, + "child_url": "urn:ext3" + }, + { + "id": "pt4", + "repeat_index": 2, + "parent_url": "urn:ext3", + "foreach_index": 0, + "child_url": "urn:ext4" + } + ] + }, + { + "title": "repeat with %rowIndex — branching tree breadth-first", + "view": { + "resource": "Patient", + "where": [{ "path": "id = 'pt5'" }], + "select": [ + { + "column": [{ "name": "id", "path": "id" }] + }, + { + "repeat": ["extension"], + "column": [ + { "name": "row_index", "path": "%rowIndex" }, + { "name": "url", "path": "url", "type": "uri" } + ] + } + ] + }, + "expect": [ + { "id": "pt5", "row_index": 0, "url": "urn:branch-root" }, + { "id": "pt5", "row_index": 1, "url": "urn:branch-child1" }, + { "id": "pt5", "row_index": 2, "url": "urn:branch-child2" }, + { "id": "pt5", "row_index": 3, "url": "urn:branch-grandchild" } + ] + }, + { + "title": "repeat with %rowIndex — counter resets per resource", + "view": { + "resource": "Patient", + "where": [{ "path": "id = 'pt4' or id = 'pt5'" }], + "select": [ + { + "column": [{ "name": "id", "path": "id" }] + }, + { + "repeat": ["extension"], + "column": [ + { "name": "row_index", "path": "%rowIndex" }, + { "name": "url", "path": "url", "type": "uri" } + ] + } + ] + }, + "expect": [ + { "id": "pt4", "row_index": 0, "url": "urn:ext1" }, + { "id": "pt4", "row_index": 1, "url": "urn:ext2" }, + { "id": "pt4", "row_index": 2, "url": "urn:ext3" }, + { "id": "pt4", "row_index": 3, "url": "urn:ext4" }, + { "id": "pt5", "row_index": 0, "url": "urn:branch-root" }, + { "id": "pt5", "row_index": 1, "url": "urn:branch-child1" }, + { "id": "pt5", "row_index": 2, "url": "urn:branch-child2" }, + { "id": "pt5", "row_index": 3, "url": "urn:branch-grandchild" } + ] + }, + { + "title": "repeat nested inside repeat — independent %rowIndex", + "view": { + "resource": "Patient", + "where": [{ "path": "id = 'pt4'" }], + "select": [ + { + "column": [{ "name": "id", "path": "id" }] + }, + { + "repeat": ["extension"], + "column": [ + { "name": "outer_index", "path": "%rowIndex" }, + { "name": "url", "path": "url", "type": "uri" } + ], + "select": [ + { + "repeat": ["extension"], + "column": [ + { "name": "inner_index", "path": "%rowIndex" }, + { "name": "inner_url", "path": "url", "type": "uri" } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "pt4", + "outer_index": 0, + "url": "urn:ext1", + "inner_index": 0, + "inner_url": "urn:ext2" + }, + { + "id": "pt4", + "outer_index": 0, + "url": "urn:ext1", + "inner_index": 1, + "inner_url": "urn:ext3" + }, + { + "id": "pt4", + "outer_index": 0, + "url": "urn:ext1", + "inner_index": 2, + "inner_url": "urn:ext4" + }, + { + "id": "pt4", + "outer_index": 1, + "url": "urn:ext2", + "inner_index": 0, + "inner_url": "urn:ext3" + }, + { + "id": "pt4", + "outer_index": 1, + "url": "urn:ext2", + "inner_index": 1, + "inner_url": "urn:ext4" + }, + { + "id": "pt4", + "outer_index": 2, + "url": "urn:ext3", + "inner_index": 0, + "inner_url": "urn:ext4" + } + ] + }, + { + "title": "repeat with duplicate %rowIndex references — same value per element", + "view": { + "resource": "Patient", + "where": [{ "path": "id = 'pt4'" }], + "select": [ + { + "column": [{ "name": "id", "path": "id" }] + }, + { + "repeat": ["extension"], + "column": [ + { "name": "idx_a", "path": "%rowIndex" }, + { "name": "idx_b", "path": "%rowIndex" }, + { "name": "url", "path": "url", "type": "uri" } + ] + } + ] + }, + "expect": [ + { "id": "pt4", "idx_a": 0, "idx_b": 0, "url": "urn:ext1" }, + { "id": "pt4", "idx_a": 1, "idx_b": 1, "url": "urn:ext2" }, + { "id": "pt4", "idx_a": 2, "idx_b": 2, "url": "urn:ext3" }, + { "id": "pt4", "idx_a": 3, "idx_b": 3, "url": "urn:ext4" } + ] + }, + { + "title": "repeat nested inside forEach — independent %rowIndex", + "view": { + "resource": "Patient", + "where": [{ "path": "id = 'pt6'" }], + "select": [ + { + "column": [{ "name": "id", "path": "id" }] + }, + { + "forEach": "extension", + "column": [ + { "name": "foreach_index", "path": "%rowIndex" }, + { "name": "parent_url", "path": "url", "type": "uri" } + ], + "select": [ + { + "repeat": ["extension"], + "column": [ + { "name": "repeat_index", "path": "%rowIndex" }, + { "name": "inner_url", "path": "url", "type": "uri" } + ] + } + ] + } + ] + }, + "expect": [ + { + "id": "pt6", + "foreach_index": 0, + "parent_url": "urn:fe1", + "repeat_index": 0, + "inner_url": "urn:fe1.1" + }, + { + "id": "pt6", + "foreach_index": 1, + "parent_url": "urn:fe2", + "repeat_index": 0, + "inner_url": "urn:fe2.1" + } + ] + } + ] +} diff --git a/openspec/changes/archive/2026-03-18-row-index-env-variable/.openspec.yaml b/openspec/changes/archive/2026-03-18-row-index-env-variable/.openspec.yaml new file mode 100644 index 0000000000..3c861dd5b6 --- /dev/null +++ b/openspec/changes/archive/2026-03-18-row-index-env-variable/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-03-18 diff --git a/openspec/changes/archive/2026-03-18-row-index-env-variable/design.md b/openspec/changes/archive/2026-03-18-row-index-env-variable/design.md new file mode 100644 index 0000000000..af2b6b12b4 --- /dev/null +++ b/openspec/changes/archive/2026-03-18-row-index-env-variable/design.md @@ -0,0 +1,74 @@ +## Context + +Pathling's ViewDefinition processing uses `UnnestingSelection` to implement `forEach`/`forEachOrNull`. This evaluates a FHIRPath expression to get an array-valued collection, then applies a projection to each element using Spark's higher-order `transform(array, element -> ...)` function via `ColumnRepresentation.transform()` and `ProjectionClause.evaluateElementWise()`. + +FHIRPath environment variables are resolved through a `VariableResolverChain` — a chain-of-responsibility pattern where resolvers (`BuiltInConstantResolver`, `ContextVariableResolver`, `SuppliedVariableResolver`, etc.) are queried in sequence. Variables resolve to `Collection` objects containing Spark `Column` expressions. User-supplied variables (e.g. ViewDefinition constants) are passed as `Map` through the `SingleResourceEvaluator`. + +The `%rowIndex` variable is different from existing environment variables because its value changes per-element during iteration, rather than being constant across the entire evaluation. + +## Goals / Non-Goals + +**Goals:** + +- Provide `%rowIndex` as a 0-based integer environment variable within `forEach` and `forEachOrNull` iterations. +- Default to `0` at the top level when no iteration is active. +- Support independent `%rowIndex` values at each nesting level. +- Make `%rowIndex` available to all FHIRPath expressions within the iteration scope. + +**Non-Goals:** + +- Supporting `%rowIndex` within `repeat` iterations (separate future work). +- Changes to the FHIRPath parser or grammar (environment variables are already parsed via the `%name` syntax). + +## Decisions + +### Use Spark's indexed transform for per-element index tracking + +**Decision:** Use `functions.transform(array, (element, index) -> ...)` — Spark's two-argument lambda variant of the `transform` higher-order function — to propagate the element index during unnesting. + +**Rationale:** The current `evaluateElementWise` method uses `ColumnRepresentation.transform()` which calls `functions.transform(array, element -> ...)`. Spark provides a built-in overload that passes both the element and its 0-based index to the lambda. This aligns directly with the `%rowIndex` semantics and avoids generating indices externally. + +**Alternatives considered:** + +- _`posexplode` + rejoin_: Would explode arrays with position indices then rejoin. Rejected because it requires dataset-level operations (adding/removing rows), which conflicts with the current column-expression-based architecture that works within Spark's higher-order functions. +- _`zip_with_index` preprocessing_: Would pre-wrap each array element with its index before transformation. Rejected as unnecessary complexity when Spark's `transform` already provides index natively. + +### Inject %rowIndex via the existing supplied variables mechanism + +**Decision:** Pass `%rowIndex` as a supplied variable in the `Map` that flows through `SingleResourceEvaluator`. The `UnnestingSelection` will create a new evaluator (or updated variable map) for each unnesting level that includes the current index column as the `rowIndex` variable. + +**Rationale:** The existing `SuppliedVariableResolver` and `VariableResolverChain` infrastructure already supports arbitrary named variables passed as `Collection` objects. Using this mechanism avoids creating a new resolver type and keeps `%rowIndex` consistent with how ViewDefinition constants are handled. + +**Alternative considered:** + +- _Dedicated `RowIndexResolver`_: A new `EnvironmentVariableResolver` implementation specific to `%rowIndex`. Rejected as over-engineering — the supplied variables mechanism handles this cleanly and requires no changes to the resolver chain infrastructure. + +### Thread index through ProjectionContext + +**Decision:** Extend `ProjectionContext` to carry the current `%rowIndex` column, and update `UnnestingSelection` to pass the index from Spark's `transform` lambda into the projection context. The context will merge this index into the evaluator's variable map when creating the per-element evaluation context. + +**Rationale:** `ProjectionContext` is the natural place to carry per-iteration state since it already carries the `inputContext` and `evaluator`. Adding the row index here keeps the change localised to the projection layer and avoids threading index state through unrelated components. + +**Implementation approach:** + +1. Add a `rowIndex` field (type `Column`, defaulting to `lit(0)`) to `ProjectionContext`. +2. Modify `evaluateElementWise` in `UnnestingSelection` (or introduce a new method) to use the indexed `transform` variant, capturing the index column. +3. When building the per-element `ProjectionContext`, include the index column as a `rowIndex` supplied variable via the evaluator's variable map. +4. Nesting is handled naturally: each `UnnestingSelection` creates a new context with its own `%rowIndex`, shadowing the outer value. + +### Use IntegerCollection for the %rowIndex type + +**Decision:** Represent `%rowIndex` as an `IntegerCollection` wrapping a Spark integer column. + +**Rationale:** `IntegerCollection` is the standard FHIRPath integer representation. It supports arithmetic (`%rowIndex + 1`) and comparisons (`%rowIndex = 0`) out of the box. The index from Spark's `transform` is already an integer column, so no type conversion is needed. + +## Risks / Trade-offs + +**[Risk] Evaluator immutability** — `SingleResourceEvaluator` stores variables as a `Map` set at construction time. Injecting a per-element `%rowIndex` requires either creating a new evaluator per unnesting level or making the variable map mutable. +→ **Mitigation:** Create a new `SingleResourceEvaluator` (or a lightweight wrapper) per `UnnestingSelection` that includes `rowIndex` in its variable map. This preserves immutability and isolates each nesting level. + +**[Risk] Performance impact of creating per-level evaluators** — Creating new evaluator instances per unnesting level could add overhead. +→ **Mitigation:** The evaluators are lightweight objects (no dataset or SparkSession state). The per-level cost is negligible compared to the Spark query execution itself. Additionally, this already happens implicitly via `ProjectionContext.withInputContext()`. + +**[Risk] forEachOrNull with empty collection should produce %rowIndex = 0** — When `forEachOrNull` produces a null row for an empty collection, the index must still resolve to `0`. +→ **Mitigation:** The `orNull` mechanism in `ProjectionResult` handles the empty-collection case. The default `%rowIndex` value of `0` in the projection context will naturally apply since no transform iteration occurs for empty collections. diff --git a/openspec/changes/archive/2026-03-18-row-index-env-variable/proposal.md b/openspec/changes/archive/2026-03-18-row-index-env-variable/proposal.md new file mode 100644 index 0000000000..cc9263a52e --- /dev/null +++ b/openspec/changes/archive/2026-03-18-row-index-env-variable/proposal.md @@ -0,0 +1,29 @@ +## Why + +The [SQL on FHIR ViewDefinition spec](https://build.fhir.org/ig/FHIR/sql-on-fhir-v2/StructureDefinition-ViewDefinition.html#rowindex) defines a `%rowIndex` environment variable that provides the 0-based index of the current element within the collection being iterated by `forEach` or `forEachOrNull`. Pathling's ViewDefinition support does not yet implement this variable, preventing users from preserving element ordering, disambiguating repeating elements, and constructing surrogate keys in flattened output. + +## What Changes + +- Add a new `%rowIndex` environment variable to the FHIRPath evaluation context. +- `%rowIndex` resolves to the 0-based index of the current element within the collection being iterated by `forEach` or `forEachOrNull`. +- At the top level (no iteration), `%rowIndex` evaluates to `0`. +- Each nesting level of `forEach`/`forEachOrNull` maintains an independent `%rowIndex` value. +- Support for `%rowIndex` within `repeat` is out of scope for this change. +- The variable is available to all FHIRPath expressions evaluated within the iteration scope, including nested `select` clauses. + +## Capabilities + +### New Capabilities + +- `row-index-variable`: Support for the `%rowIndex` environment variable within ViewDefinition `forEach` and `forEachOrNull` iterations, providing a 0-based element index. + +### Modified Capabilities + +_(none)_ + +## Impact + +- **fhirpath module**: Environment variable resolution chain needs a new resolver or mechanism to supply `%rowIndex` values that change per-element during iteration. +- **projection module**: `UnnestingSelection` (forEach/forEachOrNull) needs to track the current element index and inject it into the evaluation context. +- **views module**: `FhirViewExecutor` may need minor changes to initialise `%rowIndex` at the top level (value `0`). +- **Public API**: No breaking changes. `%rowIndex` is a new environment variable that was previously unsupported; existing ViewDefinitions and FHIRPath expressions are unaffected. diff --git a/openspec/changes/archive/2026-03-18-row-index-env-variable/specs/row-index-variable/spec.md b/openspec/changes/archive/2026-03-18-row-index-env-variable/specs/row-index-variable/spec.md new file mode 100644 index 0000000000..02bf2f460e --- /dev/null +++ b/openspec/changes/archive/2026-03-18-row-index-env-variable/specs/row-index-variable/spec.md @@ -0,0 +1,80 @@ +## ADDED Requirements + +### Requirement: %rowIndex resolves to element index within forEach + +When a ViewDefinition `select` clause uses `forEach`, the `%rowIndex` environment variable SHALL resolve to the 0-based index of the current element within the collection produced by the `forEach` expression. The index reflects the element's position in the collection as evaluated by the FHIRPath expression, starting at 0 for the first element. + +#### Scenario: Single forEach with multiple elements + +- **WHEN** a ViewDefinition has `forEach: "Patient.name"` and the Patient has 3 names +- **THEN** `%rowIndex` SHALL be `0` for the first name, `1` for the second, and `2` for the third + +#### Scenario: forEach with single element + +- **WHEN** a ViewDefinition has `forEach: "Patient.name"` and the Patient has 1 name +- **THEN** `%rowIndex` SHALL be `0` for that name + +#### Scenario: forEach with empty collection + +- **WHEN** a ViewDefinition has `forEach: "Patient.name"` and the Patient has no names +- **THEN** no rows are produced (forEach produces no output for empty collections), so `%rowIndex` is not evaluated + +### Requirement: %rowIndex resolves to element index within forEachOrNull + +When a ViewDefinition `select` clause uses `forEachOrNull`, the `%rowIndex` environment variable SHALL resolve to the 0-based index of the current element within the collection produced by the `forEachOrNull` expression, following the same indexing rules as `forEach`. + +#### Scenario: forEachOrNull with multiple elements + +- **WHEN** a ViewDefinition has `forEachOrNull: "Patient.name"` and the Patient has 2 names +- **THEN** `%rowIndex` SHALL be `0` for the first name and `1` for the second + +#### Scenario: forEachOrNull with empty collection + +- **WHEN** a ViewDefinition has `forEachOrNull: "Patient.name"` and the Patient has no names +- **THEN** one row is produced with null values for all nested columns including `%rowIndex` + +### Requirement: %rowIndex defaults to 0 at top level + +When no `forEach` or `forEachOrNull` iteration is active (i.e. the expression is evaluated at the top level of a ViewDefinition select), `%rowIndex` SHALL evaluate to `0`. + +#### Scenario: Top-level column referencing %rowIndex + +- **WHEN** a ViewDefinition `select` has a column with expression `%rowIndex` and no `forEach` or `forEachOrNull` is active +- **THEN** the column value SHALL be `0` for every resource row + +### Requirement: Nested iterations maintain independent %rowIndex values + +Each nesting level of `forEach`/`forEachOrNull` SHALL maintain its own independent `%rowIndex`. An inner `forEach` resets `%rowIndex` to count within its own collection, and restoring the outer `%rowIndex` when the inner iteration completes. + +#### Scenario: Nested forEach iterations + +- **WHEN** a ViewDefinition has an outer `forEach: "Patient.name"` (Patient has 2 names) and an inner `forEach: "HumanName.given"` (first name has 2 givens, second name has 1 given) +- **THEN** for the first name: outer `%rowIndex` is `0`, inner `%rowIndex` is `0` and `1` for each given; for the second name: outer `%rowIndex` is `1`, inner `%rowIndex` is `0` for its single given + +#### Scenario: Inner forEach does not affect outer %rowIndex + +- **WHEN** a column expression references `%rowIndex` at the outer forEach level after an inner forEach has completed +- **THEN** the value SHALL reflect the outer iteration index, unaffected by the inner iteration + +### Requirement: %rowIndex is available in nested select expressions + +The `%rowIndex` variable SHALL be accessible from any FHIRPath expression evaluated within the scope of the current iteration, including columns within nested `select` clauses that do not themselves introduce a new `forEach`/`forEachOrNull`. + +#### Scenario: Column in nested select without its own forEach + +- **WHEN** a `forEach` iterates over `Patient.name` and a nested `select` (without its own `forEach`) contains a column with expression `%rowIndex` +- **THEN** the column SHALL resolve to the index from the enclosing `forEach` + +### Requirement: %rowIndex is an integer type + +The `%rowIndex` variable SHALL resolve to an integer value compatible with FHIRPath integer type, allowing arithmetic operations and comparisons. + +#### Scenario: Arithmetic with %rowIndex + +- **WHEN** a column expression is `%rowIndex + 1` +- **THEN** the result SHALL be the 1-based index of the current element + +#### Scenario: Comparison with %rowIndex + +- **WHEN** a `where` clause filters with `%rowIndex = 0` +- **THEN** only the first element of the iterated collection SHALL be included diff --git a/openspec/changes/archive/2026-03-18-row-index-env-variable/tasks.md b/openspec/changes/archive/2026-03-18-row-index-env-variable/tasks.md new file mode 100644 index 0000000000..09892ac72b --- /dev/null +++ b/openspec/changes/archive/2026-03-18-row-index-env-variable/tasks.md @@ -0,0 +1,29 @@ +## 1. Extend ProjectionContext with row index + +- [x] 1.1 Add a `rowIndex` field (type `Column`) to `ProjectionContext`, defaulting to `lit(0)` +- [x] 1.2 Add a `withRowIndex(Column)` method to create a new context with a different row index +- [x] 1.3 Update `ProjectionContext` to inject `rowIndex` as a `%rowIndex` supplied variable into the evaluator's variable map when evaluating expressions + +## 2. Add indexed transform support + +- [x] 2.1 Add an indexed `transform` method to `ColumnRepresentation` that uses Spark's `transform(array, (element, index) -> ...)` variant, returning both the transformed column and making the index available to the caller +- [x] 2.2 Add an `evaluateElementWiseWithIndex` method (or modify the existing flow) in `UnnestingSelection` that uses the indexed transform and passes the index column into the projection context via `withRowIndex` + +## 3. Wire up UnnestingSelection + +- [x] 3.1 Modify `UnnestingSelection.evaluate()` to use the indexed transform, creating a per-element `ProjectionContext` that carries the current index as `%rowIndex` +- [x] 3.2 Ensure nested `UnnestingSelection` levels shadow the outer `%rowIndex` with their own index value + +## 4. Handle forEachOrNull empty collection case + +- [x] 4.1 Verify that when `forEachOrNull` produces a null row for an empty collection, `%rowIndex` resolves to `0` (the default from `ProjectionContext`) + +## 5. Tests + +- [x] 5.1 Write a ViewDefinition integration test: `forEach` with `%rowIndex` column producing correct 0-based indices +- [x] 5.2 Write a ViewDefinition integration test: `forEachOrNull` with non-empty collection producing correct indices +- [x] 5.3 Write a ViewDefinition integration test: `forEachOrNull` with empty collection producing null `%rowIndex` +- [x] 5.4 Write a ViewDefinition integration test: top-level `%rowIndex` (no forEach) resolves to `0` +- [x] 5.5 Write a ViewDefinition integration test: nested `forEach` with independent `%rowIndex` values at each level +- [x] 5.6 Write a ViewDefinition integration test: `%rowIndex` used in arithmetic expression (`%rowIndex + 1`) +- [x] 5.7 Verify existing ViewDefinition tests still pass (no regressions) diff --git a/openspec/changes/archive/2026-04-01-repeat-row-index/.openspec.yaml b/openspec/changes/archive/2026-04-01-repeat-row-index/.openspec.yaml new file mode 100644 index 0000000000..0f5280395b --- /dev/null +++ b/openspec/changes/archive/2026-04-01-repeat-row-index/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-04-01 diff --git a/openspec/changes/archive/2026-04-01-repeat-row-index/design.md b/openspec/changes/archive/2026-04-01-repeat-row-index/design.md new file mode 100644 index 0000000000..277f5d27d0 --- /dev/null +++ b/openspec/changes/archive/2026-04-01-repeat-row-index/design.md @@ -0,0 +1,61 @@ +## Context + +The `%rowIndex` environment variable is already implemented for `forEach`/`forEachOrNull` via `UnnestingSelection`. That implementation uses Spark's two-argument `transform(array, (element, index) -> ...)` lambda, which provides the element index natively. The index column is threaded through `ProjectionContext.withRowIndex()` and injected into expression evaluation via `SingleResourceEvaluator.withVariable()`. + +`RepeatSelection` works differently. It uses `ValueFunctions.transformTree()` to recursively flatten a tree structure (e.g., nested extensions) by concatenating results across depth levels and traversal branches. The `transformTree` function internally uses `Concat` to merge arrays from each depth level. There is no built-in Spark mechanism to track a global position index across this flattened concatenation. + +## Goals / Non-Goals + +**Goals:** + +- Provide `%rowIndex` as a 0-based integer within `repeat` iterations, reflecting the element's position in the flattened traversal-order output. +- Reset the counter to 0 for each resource row. +- Scope `%rowIndex` to the nearest enclosing iteration directive (`repeat`, `forEach`, or `forEachOrNull`), so nested directives maintain independent indices. + +**Non-Goals:** + +- Changing how `%rowIndex` works for `forEach`/`forEachOrNull` (already implemented). + +## Decisions + +### Use a stateful counter expression for global traversal-order indexing + +**Decision:** Introduce a `RowIndexCounter` class (thread-safe via `ThreadLocal`) and two Spark expressions — `RowCounter` (returns current value and increments) and `ResetCounter` (resets to 0 before evaluating its child). The counter is shared across the entire `transformTree` invocation, producing a monotonically increasing sequence across all depth levels and branches. + +**Rationale:** Unlike `forEach` where Spark's indexed `transform` provides per-element indices natively, `transformTree` concatenates results from multiple recursive levels. No single Spark `transform` call sees the full flattened output. A stateful counter that increments on each element evaluation is the simplest way to produce a global traversal-order index. + +**Alternatives considered:** + +- _Post-hoc indexing with `posexplode`_: Explode the final array with position indices. Rejected because the array is already embedded in a column expression pipeline — adding a dataset-level operation would require restructuring the projection architecture. +- _Pre-stamping indices into the tree_: Wrap each element with its index before `transformTree`. Rejected because the total count across levels is not known until traversal completes, and the breadth-first concatenation order makes pre-computation complex. + +### Thread-safety via ThreadLocal + +**Decision:** `RowIndexCounter` uses `ThreadLocal` for its mutable state. The class is `Serializable` with a transient `ThreadLocal` field that is lazily re-initialized after deserialization. + +**Rationale:** Spark tasks run in parallel across partitions on different threads. `ThreadLocal` ensures each partition's task thread gets an independent counter, preventing cross-partition interference. The `int[]` wrapper avoids boxing overhead. Lazy re-initialization handles the case where the counter is serialized to an executor and deserialized in a new JVM. + +### Inject counter via ProjectionContext.withRowIndex() + +**Decision:** `RepeatSelection` creates a `RowIndexCounter`, wraps it in a `RowCounter` column, and injects it into the `ProjectionContext` via the existing `withRowIndex()` method before building the `transformTree` expression. The final result is wrapped with `ResetCounter` to ensure the counter resets for each resource row. + +**Rationale:** This reuses the same mechanism that `UnnestingSelection` uses for `forEach` — the `rowIndex` field on `ProjectionContext` is already threaded into `evalExpression()` and resolved as the `%rowIndex` variable. The only difference is the source of the index column: Spark's indexed transform lambda vs. a stateful counter expression. + +**Scoping:** When a `forEach` is nested inside a `repeat`, the inner `UnnestingSelection` calls `withRowIndex(index)` with its own transform-provided index, naturally shadowing the outer `repeat`'s counter. Conversely, a `repeat` nested inside a `forEach` would create its own `RowIndexCounter`, independent of the outer scope. + +### Place RowCounter/ResetCounter in the encoders module + +**Decision:** The `RowCounter` and `ResetCounter` Spark expressions, along with the `RowIndexCounter` state class, are placed in the `encoders` module alongside other custom Spark expressions (`Expressions.scala`, `ValueFunctions.java`). + +**Rationale:** The `encoders` module already contains all custom Spark Catalyst expressions (e.g., `TransformTree`, `PruneSyntheticFields`). `RowCounter` and `ResetCounter` are general-purpose Spark expressions that could potentially be reused beyond `repeat`. Convenience methods are added to `ValueFunctions` following the existing pattern. + +## Risks / Trade-offs + +**[Risk] Evaluation order determinism** — The counter relies on deterministic evaluation order within `transformTree`. If Spark were to evaluate elements in a non-deterministic order, indices would be unpredictable. +→ **Mitigation:** `transformTree` uses `Concat` of `transform` calls, both of which preserve array order. Spark's higher-order functions evaluate elements sequentially within a single row. The spike's tests confirm deterministic ordering. + +**[Risk] Single-partition constraint in tests** — The spike's encoder unit tests use `.repartition(1)` to ensure deterministic evaluation order across rows. This is a test-level constraint, not a runtime limitation — in production, each partition processes its rows independently and the counter resets per row via `ResetCounter`. +→ **Mitigation:** Document this constraint in test comments. The `ResetCounter` ensures correctness regardless of partitioning. + +**[Risk] Codegen compatibility** — `RowCounter` extends `Nondeterministic` and implements `doGenCode` for Spark's whole-stage code generation. If the codegen path diverges from the interpreted path, indices could be incorrect. +→ **Mitigation:** The `ExpressionsBothModesTest` base class runs all encoder tests in both interpreted and codegen modes, catching any divergence. diff --git a/openspec/changes/archive/2026-04-01-repeat-row-index/proposal.md b/openspec/changes/archive/2026-04-01-repeat-row-index/proposal.md new file mode 100644 index 0000000000..e3f6d28942 --- /dev/null +++ b/openspec/changes/archive/2026-04-01-repeat-row-index/proposal.md @@ -0,0 +1,27 @@ +## Why + +The `%rowIndex` environment variable is currently implemented for `forEach` and `forEachOrNull` ViewDefinition directives but not for `repeat`. The `repeat` directive flattens recursive structures (e.g., nested extensions, Questionnaire items) into rows, and users need `%rowIndex` to preserve ordering, disambiguate elements, and construct surrogate keys — the same use cases that motivated `%rowIndex` for `forEach`. This completes the `%rowIndex` implementation across all iteration directives as defined in the SQL on FHIR ViewDefinition spec. + +## What Changes + +- `%rowIndex` resolves to a 0-based global traversal-order index within `repeat` iterations, treating the entire flattened recursive tree as the collection being iterated. +- The counter resets to 0 for each resource row. +- Each `repeat` directive scopes its own `%rowIndex`, independent of enclosing or nested `forEach`/`forEachOrNull`/`repeat` directives. +- A stateful counter mechanism (`RowIndexCounter`) is introduced at the Spark expression level to track element positions across tree depth levels and traversal branches. + +## Capabilities + +### New Capabilities + +(none) + +### Modified Capabilities + +- `row-index-variable`: Add requirements for `%rowIndex` within `repeat` directives, including global traversal-order semantics, per-resource reset, and scoping rules for nested `repeat`. + +## Impact + +- `encoders` module: New Spark expressions (`RowCounter`, `ResetCounter`) and supporting `RowIndexCounter` class. +- `fhirpath` module: `RepeatSelection` injects the row index counter into `ProjectionContext` and wraps output with counter reset. +- ViewDefinition test suite: New `%rowIndex` + `repeat` test cases added to `rowindex.json` (alongside existing forEach/forEachOrNull tests), with additional test resource data for recursively nested extensions. +- Encoder unit tests: New tests in `ExpressionsBothModesTest` for counter behaviour with `transform`, `transformTree`, and arithmetic composition. diff --git a/openspec/changes/archive/2026-04-01-repeat-row-index/specs/row-index-variable/spec.md b/openspec/changes/archive/2026-04-01-repeat-row-index/specs/row-index-variable/spec.md new file mode 100644 index 0000000000..7efcda0440 --- /dev/null +++ b/openspec/changes/archive/2026-04-01-repeat-row-index/specs/row-index-variable/spec.md @@ -0,0 +1,83 @@ +## ADDED Requirements + +### Requirement: %rowIndex resolves to global traversal-order index within repeat + +When a ViewDefinition `select` clause uses `repeat`, the `%rowIndex` environment variable SHALL resolve to the 0-based index of the current element within the flattened collection produced by the recursive traversal. The index reflects the element's position in the complete flattened output (across all depth levels and traversal branches), not its position within a single depth level. + +#### Scenario: Linear repeat with sequential indices + +- **WHEN** a ViewDefinition has `repeat: ["extension"]` and the resource has a chain of 4 nested extensions (each containing one child extension) +- **THEN** `%rowIndex` SHALL be `0` for the first extension, `1` for its child, `2` for the grandchild, and `3` for the great-grandchild + +#### Scenario: Branching repeat with breadth-first indices + +- **WHEN** a ViewDefinition has `repeat: ["extension"]` and the resource has a root extension with 2 child extensions (the first child having 1 grandchild) +- **THEN** the root extension SHALL have `%rowIndex` `0`, its two children SHALL have `%rowIndex` `1` and `2` (in document order), and the grandchild SHALL have `%rowIndex` `3` + +#### Scenario: Repeat with single element + +- **WHEN** a ViewDefinition has `repeat: ["extension"]` and the resource has exactly 1 extension with no nested extensions +- **THEN** `%rowIndex` SHALL be `0` for that extension + +#### Scenario: Repeat with empty collection + +- **WHEN** a ViewDefinition has `repeat: ["extension"]` and the resource has no extensions +- **THEN** no rows are produced, so `%rowIndex` is not evaluated + +### Requirement: %rowIndex resets to 0 for each resource row within repeat + +The `%rowIndex` counter SHALL reset to 0 at the start of each resource row. The index sequence is scoped to a single resource's traversal, not global across the dataset. + +#### Scenario: Counter resets across resources + +- **WHEN** a ViewDefinition has `repeat: ["extension"]` and two resources each have nested extensions +- **THEN** the `%rowIndex` sequence SHALL start at `0` independently for each resource + +### Requirement: repeat scopes its own %rowIndex independently from enclosing and nested directives + +Each `repeat` directive SHALL maintain its own `%rowIndex` scope. A `forEach` or `forEachOrNull` nested inside a `repeat` SHALL have its own independent `%rowIndex`. Likewise, a `repeat` nested inside a `forEach` SHALL have its own independent `%rowIndex`. + +#### Scenario: forEach nested inside repeat has independent %rowIndex + +- **WHEN** a ViewDefinition has a `repeat: ["extension"]` with a nested `forEach: "extension"` inside it +- **THEN** the `repeat` level `%rowIndex` SHALL reflect the global traversal position, and the inner `forEach` `%rowIndex` SHALL reflect the 0-based index within that element's immediate children, independent of the outer repeat index + +#### Scenario: repeat nested inside forEach has independent %rowIndex + +- **WHEN** a ViewDefinition has `forEach: "name"` with a nested `repeat: ["extension"]` inside it +- **THEN** the outer `forEach` `%rowIndex` SHALL reflect the name index, and the inner `repeat` `%rowIndex` SHALL start at `0` for each name's extension traversal + +#### Scenario: repeat nested inside repeat has independent %rowIndex + +- **WHEN** a ViewDefinition has an outer `repeat: ["extension"]` with an inner `repeat: ["extension"]` nested inside it via a `select` +- **THEN** the outer `repeat` `%rowIndex` SHALL reflect the global traversal position in the outer flattened tree, and the inner `repeat` `%rowIndex` SHALL start at `0` independently for each element's nested extension traversal + +### Requirement: %rowIndex supports arithmetic within repeat + +The `%rowIndex` variable within `repeat` iterations SHALL resolve to an integer value compatible with FHIRPath integer type, allowing arithmetic operations. + +#### Scenario: Arithmetic with %rowIndex in repeat + +- **WHEN** a column expression within a `repeat` block is `%rowIndex + 1` +- **THEN** the result SHALL be the 1-based position of the element in the flattened traversal + +## MODIFIED Requirements + +### Requirement: Nested iterations maintain independent %rowIndex values + +Each nesting level of `forEach`/`forEachOrNull`/`repeat` SHALL maintain its own independent `%rowIndex`. An inner iteration directive resets `%rowIndex` to count within its own collection, restoring the outer `%rowIndex` when the inner iteration completes. + +#### Scenario: Nested forEach iterations + +- **WHEN** a ViewDefinition has an outer `forEach: "Patient.name"` (Patient has 2 names) and an inner `forEach: "HumanName.given"` (first name has 2 givens, second name has 1 given) +- **THEN** for the first name: outer `%rowIndex` is `0`, inner `%rowIndex` is `0` and `1` for each given; for the second name: outer `%rowIndex` is `1`, inner `%rowIndex` is `0` for its single given + +#### Scenario: Inner forEach does not affect outer %rowIndex + +- **WHEN** a column expression references `%rowIndex` at the outer forEach level after an inner forEach has completed +- **THEN** the value SHALL reflect the outer iteration index, unaffected by the inner iteration + +#### Scenario: Nested repeat and forEach maintain independent indices + +- **WHEN** a ViewDefinition has `repeat: ["extension"]` containing a nested `forEach: "extension"` +- **THEN** each directive level SHALL maintain its own `%rowIndex`, with the inner `forEach` index being independent of the outer `repeat` index diff --git a/openspec/changes/archive/2026-04-01-repeat-row-index/tasks.md b/openspec/changes/archive/2026-04-01-repeat-row-index/tasks.md new file mode 100644 index 0000000000..8b61973c7f --- /dev/null +++ b/openspec/changes/archive/2026-04-01-repeat-row-index/tasks.md @@ -0,0 +1,18 @@ +## 1. Cherry-pick spike implementation + +- [x] 1.1 Cherry-pick commit `093d39c645` from `spike/repeat_row_index` onto `issue/2560` — this brings in `RowIndexCounter`, `RowCounter`/`ResetCounter` expressions, `ValueFunctions` methods, `RepeatSelection` wiring, encoder unit tests, and initial repeat `%rowIndex` view tests + +## 2. Move and extend ViewDefinition test cases (rowindex.json) + +- [x] 2.1 Move the 3 repeat `%rowIndex` tests from `repeat.json` to `rowindex.json` (remove from `repeat.json`) +- [x] 2.2 Add test resource with recursively nested extensions (linear chain) to `rowindex.json` resources — reuse or adapt the extension structure from `repeat.json` +- [x] 2.3 Add test resource with branching extensions (root extension with 2 children, first child has 1 grandchild) to `rowindex.json` resources +- [x] 2.4 Add test: repeat with `%rowIndex` — branching tree, breadth-first indices (uses branching resource from 2.3) +- [x] 2.5 Add test: repeat with `%rowIndex` across multiple resources — verify counter resets to 0 per resource +- [x] 2.6 Add test: repeat nested inside repeat — independent `%rowIndex` scopes + +## 3. Verification + +- [x] 3.1 Run encoder unit tests (`ExpressionsBothModesTest` subclasses) in both interpreted and codegen modes +- [x] 3.2 Run ViewDefinition test suite (`ViewDefinitionTest`) to verify all `rowindex.json` tests pass +- [x] 3.3 Run existing `repeat.json` tests to verify no regressions diff --git a/openspec/specs/row-index-variable/spec.md b/openspec/specs/row-index-variable/spec.md new file mode 100644 index 0000000000..ef045252f4 --- /dev/null +++ b/openspec/specs/row-index-variable/spec.md @@ -0,0 +1,146 @@ +## ADDED Requirements + +### Requirement: %rowIndex resolves to element index within forEach + +When a ViewDefinition `select` clause uses `forEach`, the `%rowIndex` environment variable SHALL resolve to the 0-based index of the current element within the collection produced by the `forEach` expression. The index reflects the element's position in the collection as evaluated by the FHIRPath expression, starting at 0 for the first element. + +#### Scenario: Single forEach with multiple elements + +- **WHEN** a ViewDefinition has `forEach: "Patient.name"` and the Patient has 3 names +- **THEN** `%rowIndex` SHALL be `0` for the first name, `1` for the second, and `2` for the third + +#### Scenario: forEach with single element + +- **WHEN** a ViewDefinition has `forEach: "Patient.name"` and the Patient has 1 name +- **THEN** `%rowIndex` SHALL be `0` for that name + +#### Scenario: forEach with empty collection + +- **WHEN** a ViewDefinition has `forEach: "Patient.name"` and the Patient has no names +- **THEN** no rows are produced (forEach produces no output for empty collections), so `%rowIndex` is not evaluated + +### Requirement: %rowIndex resolves to element index within forEachOrNull + +When a ViewDefinition `select` clause uses `forEachOrNull`, the `%rowIndex` environment variable SHALL resolve to the 0-based index of the current element within the collection produced by the `forEachOrNull` expression, following the same indexing rules as `forEach`. + +#### Scenario: forEachOrNull with multiple elements + +- **WHEN** a ViewDefinition has `forEachOrNull: "Patient.name"` and the Patient has 2 names +- **THEN** `%rowIndex` SHALL be `0` for the first name and `1` for the second + +#### Scenario: forEachOrNull with empty collection + +- **WHEN** a ViewDefinition has `forEachOrNull: "Patient.name"` and the Patient has no names +- **THEN** one row is produced with null values for all nested columns including `%rowIndex` + +### Requirement: %rowIndex defaults to 0 at top level + +When no `forEach` or `forEachOrNull` iteration is active (i.e. the expression is evaluated at the top level of a ViewDefinition select), `%rowIndex` SHALL evaluate to `0`. + +#### Scenario: Top-level column referencing %rowIndex + +- **WHEN** a ViewDefinition `select` has a column with expression `%rowIndex` and no `forEach` or `forEachOrNull` is active +- **THEN** the column value SHALL be `0` for every resource row + +### Requirement: %rowIndex resolves to global traversal-order index within repeat + +When a ViewDefinition `select` clause uses `repeat`, the `%rowIndex` environment variable SHALL resolve to the 0-based index of the current element within the flattened collection produced by the recursive traversal. The index reflects the element's position in the complete flattened output (across all depth levels and traversal branches), not its position within a single depth level. + +#### Scenario: Linear repeat with sequential indices + +- **WHEN** a ViewDefinition has `repeat: ["extension"]` and the resource has a chain of 4 nested extensions (each containing one child extension) +- **THEN** `%rowIndex` SHALL be `0` for the first extension, `1` for its child, `2` for the grandchild, and `3` for the great-grandchild + +#### Scenario: Branching repeat with breadth-first indices + +- **WHEN** a ViewDefinition has `repeat: ["extension"]` and the resource has a root extension with 2 child extensions (the first child having 1 grandchild) +- **THEN** the root extension SHALL have `%rowIndex` `0`, its two children SHALL have `%rowIndex` `1` and `2` (in document order), and the grandchild SHALL have `%rowIndex` `3` + +#### Scenario: Repeat with single element + +- **WHEN** a ViewDefinition has `repeat: ["extension"]` and the resource has exactly 1 extension with no nested extensions +- **THEN** `%rowIndex` SHALL be `0` for that extension + +#### Scenario: Repeat with empty collection + +- **WHEN** a ViewDefinition has `repeat: ["extension"]` and the resource has no extensions +- **THEN** no rows are produced, so `%rowIndex` is not evaluated + +### Requirement: %rowIndex resets to 0 for each resource row within repeat + +The `%rowIndex` counter SHALL reset to 0 at the start of each resource row. The index sequence is scoped to a single resource's traversal, not global across the dataset. + +#### Scenario: Counter resets across resources + +- **WHEN** a ViewDefinition has `repeat: ["extension"]` and two resources each have nested extensions +- **THEN** the `%rowIndex` sequence SHALL start at `0` independently for each resource + +### Requirement: repeat scopes its own %rowIndex independently from enclosing and nested directives + +Each `repeat` directive SHALL maintain its own `%rowIndex` scope. A `forEach` or `forEachOrNull` nested inside a `repeat` SHALL have its own independent `%rowIndex`. Likewise, a `repeat` nested inside a `forEach` SHALL have its own independent `%rowIndex`. + +#### Scenario: forEach nested inside repeat has independent %rowIndex + +- **WHEN** a ViewDefinition has a `repeat: ["extension"]` with a nested `forEach: "extension"` inside it +- **THEN** the `repeat` level `%rowIndex` SHALL reflect the global traversal position, and the inner `forEach` `%rowIndex` SHALL reflect the 0-based index within that element's immediate children, independent of the outer repeat index + +#### Scenario: repeat nested inside forEach has independent %rowIndex + +- **WHEN** a ViewDefinition has `forEach: "name"` with a nested `repeat: ["extension"]` inside it +- **THEN** the outer `forEach` `%rowIndex` SHALL reflect the name index, and the inner `repeat` `%rowIndex` SHALL start at `0` for each name's extension traversal + +#### Scenario: repeat nested inside repeat has independent %rowIndex + +- **WHEN** a ViewDefinition has an outer `repeat: ["extension"]` with an inner `repeat: ["extension"]` nested inside it via a `select` +- **THEN** the outer `repeat` `%rowIndex` SHALL reflect the global traversal position in the outer flattened tree, and the inner `repeat` `%rowIndex` SHALL start at `0` independently for each element's nested extension traversal + +### Requirement: %rowIndex supports arithmetic within repeat + +The `%rowIndex` variable within `repeat` iterations SHALL resolve to an integer value compatible with FHIRPath integer type, allowing arithmetic operations. + +#### Scenario: Arithmetic with %rowIndex in repeat + +- **WHEN** a column expression within a `repeat` block is `%rowIndex + 1` +- **THEN** the result SHALL be the 1-based position of the element in the flattened traversal + +### Requirement: Nested iterations maintain independent %rowIndex values + +Each nesting level of `forEach`/`forEachOrNull`/`repeat` SHALL maintain its own independent `%rowIndex`. An inner iteration directive resets `%rowIndex` to count within its own collection, restoring the outer `%rowIndex` when the inner iteration completes. + +#### Scenario: Nested forEach iterations + +- **WHEN** a ViewDefinition has an outer `forEach: "Patient.name"` (Patient has 2 names) and an inner `forEach: "HumanName.given"` (first name has 2 givens, second name has 1 given) +- **THEN** for the first name: outer `%rowIndex` is `0`, inner `%rowIndex` is `0` and `1` for each given; for the second name: outer `%rowIndex` is `1`, inner `%rowIndex` is `0` for its single given + +#### Scenario: Inner forEach does not affect outer %rowIndex + +- **WHEN** a column expression references `%rowIndex` at the outer forEach level after an inner forEach has completed +- **THEN** the value SHALL reflect the outer iteration index, unaffected by the inner iteration + +#### Scenario: Nested repeat and forEach maintain independent indices + +- **WHEN** a ViewDefinition has `repeat: ["extension"]` containing a nested `forEach: "extension"` +- **THEN** each directive level SHALL maintain its own `%rowIndex`, with the inner `forEach` index being independent of the outer `repeat` index + +### Requirement: %rowIndex is available in nested select expressions + +The `%rowIndex` variable SHALL be accessible from any FHIRPath expression evaluated within the scope of the current iteration, including columns within nested `select` clauses that do not themselves introduce a new `forEach`/`forEachOrNull`. + +#### Scenario: Column in nested select without its own forEach + +- **WHEN** a `forEach` iterates over `Patient.name` and a nested `select` (without its own `forEach`) contains a column with expression `%rowIndex` +- **THEN** the column SHALL resolve to the index from the enclosing `forEach` + +### Requirement: %rowIndex is an integer type + +The `%rowIndex` variable SHALL resolve to an integer value compatible with FHIRPath integer type, allowing arithmetic operations and comparisons. + +#### Scenario: Arithmetic with %rowIndex + +- **WHEN** a column expression is `%rowIndex + 1` +- **THEN** the result SHALL be the 1-based index of the current element + +#### Scenario: Comparison with %rowIndex + +- **WHEN** a `where` clause filters with `%rowIndex = 0` +- **THEN** only the first element of the iterated collection SHALL be included