From 5ca890ac1a556b2759c3ba74f264612ed7acbb27 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 6 Mar 2026 18:31:59 -0800 Subject: [PATCH 01/53] python(feature): sift_client low level wrapper for exports --- .../_internal/low_level_wrappers/exports.py | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 python/lib/sift_client/_internal/low_level_wrappers/exports.py diff --git a/python/lib/sift_client/_internal/low_level_wrappers/exports.py b/python/lib/sift_client/_internal/low_level_wrappers/exports.py new file mode 100644 index 000000000..9c1c43970 --- /dev/null +++ b/python/lib/sift_client/_internal/low_level_wrappers/exports.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, cast + +from sift.exports.v1.exports_pb2 import ( + ExportDataRequest, + ExportDataResponse, + GetDownloadUrlRequest, + GetDownloadUrlResponse, +) +from sift.exports.v1.exports_pb2_grpc import ExportServiceStub + +from sift_client._internal.low_level_wrappers.base import LowLevelClientBase +from sift_client.transport import WithGrpcClient + +if TYPE_CHECKING: + from sift_client.transport.grpc_transport import GrpcClient + + +class ExportsLowLevelClient(LowLevelClientBase, WithGrpcClient): + """Low-level client for the ExportsAPI. + + This class provides a thin wrapper around the autogenerated gRPC bindings for the ExportsAPI. + """ + + def __init__(self, grpc_client: GrpcClient): + """Initialize the ExportsLowLevelClient. + + Args: + grpc_client: The gRPC client to use for making API calls. + """ + super().__init__(grpc_client) + + async def export_data(self, request: ExportDataRequest) -> ExportDataResponse: + """Initiate a data export. + + Returns a presigned_url if the export completes immediately, or a job_id + if it's processed in the background. Use get_download_url() to retrieve + the URL for background jobs. + + Args: + request: The ExportDataRequest proto message. + + Returns: + The ExportDataResponse containing either a presigned_url or a job_id. + """ + response = await self._grpc_client.get_stub(ExportServiceStub).ExportData(request) + return cast("ExportDataResponse", response) + + async def get_download_url(self, job_id: str) -> str: + """Get the download URL for a background export job. + + If the job is still processing, the server will return an error. + Polling/retry logic should be handled. + + Args: + job_id: The job ID returned from export_data(). + + Returns: + The presigned URL to download the exported zip file. + """ + request = GetDownloadUrlRequest(job_id=job_id) + response = await self._grpc_client.get_stub(ExportServiceStub).GetDownloadUrl(request) + response = cast("GetDownloadUrlResponse", response) + return response.presigned_url From a083f1d739c240a0eaf9a53542e60d0c17c32066 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 10 Mar 2026 10:35:28 -0700 Subject: [PATCH 02/53] python(feat): data export api for sift_client --- python/lib/sift_client/client.py | 8 + python/lib/sift_client/resources/__init__.py | 4 + python/lib/sift_client/resources/exports.py | 325 ++++++++++++++++++ .../resources/sync_stubs/__init__.py | 3 + .../resources/sync_stubs/__init__.pyi | 178 ++++++++++ python/lib/sift_client/sift_types/export.py | 66 ++++ python/lib/sift_client/util/util.py | 4 + 7 files changed, 588 insertions(+) create mode 100644 python/lib/sift_client/resources/exports.py create mode 100644 python/lib/sift_client/sift_types/export.py diff --git a/python/lib/sift_client/client.py b/python/lib/sift_client/client.py index ae3302673..dcb07c175 100644 --- a/python/lib/sift_client/client.py +++ b/python/lib/sift_client/client.py @@ -7,6 +7,8 @@ CalculatedChannelsAPIAsync, ChannelsAPI, ChannelsAPIAsync, + ExportsAPI, + ExportsAPIAsync, FileAttachmentsAPI, FileAttachmentsAPIAsync, IngestionAPIAsync, @@ -101,9 +103,13 @@ class SiftClient( tags: TagsAPI """Instance of the Tags API for making synchronous requests.""" + test_results: TestResultsAPI """Instance of the Test Results API for making synchronous requests.""" + exports: ExportsAPI + """Instance of the Exports API for making synchronous requests.""" + async_: AsyncAPIs """Accessor for the asynchronous APIs. All asynchronous APIs are available as attributes on this accessor.""" @@ -152,6 +158,7 @@ def __init__( self.runs = RunsAPI(self) self.tags = TagsAPI(self) self.test_results = TestResultsAPI(self) + self.exports = ExportsAPI(self) # Accessor for the asynchronous APIs self.async_ = AsyncAPIs( @@ -167,6 +174,7 @@ def __init__( runs=RunsAPIAsync(self), tags=TagsAPIAsync(self), test_results=TestResultsAPIAsync(self), + exports=ExportsAPIAsync(self), ) @property diff --git a/python/lib/sift_client/resources/__init__.py b/python/lib/sift_client/resources/__init__.py index af9fe5e31..011f6af84 100644 --- a/python/lib/sift_client/resources/__init__.py +++ b/python/lib/sift_client/resources/__init__.py @@ -162,6 +162,7 @@ async def main(): from sift_client.resources.runs import RunsAPIAsync from sift_client.resources.tags import TagsAPIAsync from sift_client.resources.test_results import TestResultsAPIAsync +from sift_client.resources.exports import ExportsAPIAsync # ruff: noqa All imports needs to be imported before sync_stubs to avoid circular import from sift_client.resources.sync_stubs import ( @@ -176,6 +177,7 @@ async def main(): TagsAPI, TestResultsAPI, FileAttachmentsAPI, + ExportsAPI, ) import sys @@ -211,4 +213,6 @@ async def main(): "TestResultsAPI", "TestResultsAPIAsync", "TracingConfig", + "ExportsAPI", + "ExportsAPIAsync", ] diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py new file mode 100644 index 000000000..3b66abfc6 --- /dev/null +++ b/python/lib/sift_client/resources/exports.py @@ -0,0 +1,325 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from sift.calculated_channels.v2.calculated_channels_pb2 import ( + CalculatedChannelAbstractChannelReference, +) +from sift.exports.v1.exports_pb2 import ( + AssetsAndTimeRange, + CalculatedChannelConfig, + ExportDataRequest, + ExportOptions, + RunsAndTimeRange, + TimeRange, +) + +from sift_client._internal.low_level_wrappers.exports import ExportsLowLevelClient +from sift_client._internal.util.timestamp import to_pb_timestamp +from sift_client.resources._base import ResourceBase +from sift_client.sift_types.export import ExportCalculatedChannel, ExportOutputFormat # noqa: TC001 + +if TYPE_CHECKING: + from datetime import datetime + + from sift_client.client import SiftClient + + +def _build_calc_configs( + calculated_channel_configs: list[ExportCalculatedChannel] | None, +) -> list[CalculatedChannelConfig] | None: + """Convert CalculatedChannel Pydantic models to proto CalculatedChannelConfig messages.""" + if not calculated_channel_configs: + return None + return [ + CalculatedChannelConfig( + name=cc.name, + expression=cc.expression, + channel_references=[ + CalculatedChannelAbstractChannelReference( + channel_reference=ref.channel_reference, + channel_identifier=ref.channel_identifier, + ) + for ref in cc.channel_references + ], + units=cc.units, + ) + for cc in calculated_channel_configs + ] + + +class ExportsAPIAsync(ResourceBase): + """High-level API for exporting data from Sift. + + Provides three export methods based on how you want to scope the data: + + - ``export_by_run`` - Export data from one or more runs. + - ``export_by_asset`` - Export data from one or more assets within a time range. + - ``export_by_time_range`` - Export data within a time range (requires channel_ids or calculated_channel_configs). + + Each method handles the full export lifecycle: initiating the export, polling for + completion (if async), and returning the download URL. + + Example:: + + from sift_client.sift_types.export import ExportOutputFormat + + # Export by run + url = await client.async_.exports.export_by_run( + run_ids=["run-id-1"], + output_format=ExportOutputFormat.CSV, + ) + + # Export by asset with time range + url = await client.async_.exports.export_by_asset( + asset_ids=["asset-id-1"], + start_time=start, + stop_time=stop, + output_format=ExportOutputFormat.CSV, + ) + """ + + def __init__(self, sift_client: SiftClient): + """Initialize the ExportsAPI. + + Args: + sift_client: The Sift client to use. + """ + super().__init__(sift_client) + self._low_level_client = ExportsLowLevelClient(grpc_client=self._sift_client.grpc_client) + + async def export_by_run( + self, + *, + run_ids: list[str], + output_format: ExportOutputFormat, + start_time: datetime | None = None, + stop_time: datetime | None = None, + channel_ids: list[str] | None = None, + calculated_channel_configs: list[ExportCalculatedChannel] | None = None, + use_legacy_format: bool = False, + simplify_channel_names: bool = False, + combine_runs: bool = False, + split_export_by_asset: bool = False, + split_export_by_run: bool = False, + polling_interval_secs: int = 5, + timeout_secs: int | None = None, + ) -> str: + """Export data scoped by one or more runs. + + If no start_time/stop_time are provided, the full time range of each run is used. + If no channel_ids or calculated_channel_configs are provided, all channels from + the run's assets are included. + + Args: + run_ids: One or more run IDs to export data from. + output_format: The file format for the export (CSV or SUN). + start_time: Optional start time to narrow the export within the run(s). + stop_time: Optional stop time to narrow the export within the run(s). + channel_ids: Optional list of channel IDs to include. If omitted, all channels are exported. + calculated_channel_configs: Optional inline calculated channels to include in the export. + use_legacy_format: Use legacy key-value metadata format for channel headers. + simplify_channel_names: Remove the component part of channel names if unique in the export. + combine_runs: Combine channels from the same asset across different runs into a single column. + split_export_by_asset: Split each asset into its own export file. + split_export_by_run: Split each run into its own export file. + polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. + timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. + + Returns: + A presigned download URL for the exported zip file. + + Raises: + TimeoutError: If the export job does not complete within timeout_secs. + """ + runs_and_time_range = RunsAndTimeRange(run_ids=run_ids) + if start_time: + runs_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) + if stop_time: + runs_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) + + calc_configs = _build_calc_configs(calculated_channel_configs) + + export_options = ExportOptions( + use_legacy_format=use_legacy_format, + simplify_channel_names=simplify_channel_names, + combine_runs=combine_runs, + split_export_by_asset=split_export_by_asset, + split_export_by_run=split_export_by_run, + ) + + request = ExportDataRequest( + runs_and_time_range=runs_and_time_range, + output_format=output_format.value, + export_options=export_options, + channel_ids=channel_ids or [], + calculated_channel_configs=calc_configs or [], + ) + + response = await self._low_level_client.export_data(request=request) + + if response.presigned_url: + return response.presigned_url + return await self._await_download_url( + job_id=response.job_id, + polling_interval_secs=polling_interval_secs, + timeout_secs=timeout_secs, + ) + + async def export_by_asset( + self, + *, + asset_ids: list[str], + start_time: datetime, + stop_time: datetime, + output_format: ExportOutputFormat, + channel_ids: list[str] | None = None, + calculated_channel_configs: list[ExportCalculatedChannel] | None = None, + use_legacy_format: bool = False, + simplify_channel_names: bool = False, + combine_runs: bool = False, + split_export_by_asset: bool = False, + split_export_by_run: bool = False, + polling_interval_secs: int = 5, + timeout_secs: int | None = None, + ) -> str: + """Export data scoped by one or more assets within a time range. + + Both start_time and stop_time are required. If no channel_ids or + calculated_channel_configs are provided, all channels from the assets are included. + + Args: + asset_ids: One or more asset IDs to export data from. + start_time: Start of the time range to export. + stop_time: End of the time range to export. + output_format: The file format for the export (CSV or SUN). + channel_ids: Optional list of channel IDs to include. If omitted, all channels are exported. + calculated_channel_configs: Optional inline calculated channels to include in the export. + use_legacy_format: Use legacy key-value metadata format for channel headers. + simplify_channel_names: Remove the component part of channel names if unique in the export. + combine_runs: Combine channels from the same asset across different runs into a single column. + split_export_by_asset: Split each asset into its own export file. + split_export_by_run: Split each run into its own export file. + polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. + timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. + + Returns: + A presigned download URL for the exported zip file. + + Raises: + TimeoutError: If the export job does not complete within timeout_secs. + """ + assets_and_time_range = AssetsAndTimeRange(asset_ids=asset_ids) + assets_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) + assets_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) + + calc_configs = _build_calc_configs(calculated_channel_configs) + + export_options = ExportOptions( + use_legacy_format=use_legacy_format, + simplify_channel_names=simplify_channel_names, + combine_runs=combine_runs, + split_export_by_asset=split_export_by_asset, + split_export_by_run=split_export_by_run, + ) + + request = ExportDataRequest( + assets_and_time_range=assets_and_time_range, + channel_ids=channel_ids or [], + calculated_channel_configs=calc_configs or [], + output_format=output_format.value, + export_options=export_options, + ) + + response = await self._low_level_client.export_data(request=request) + + if response.presigned_url: + return response.presigned_url + return await self._await_download_url( + job_id=response.job_id, + polling_interval_secs=polling_interval_secs, + timeout_secs=timeout_secs, + ) + + async def export_by_time_range( + self, + *, + start_time: datetime, + stop_time: datetime, + output_format: ExportOutputFormat, + channel_ids: list[str] | None = None, + calculated_channel_configs: list[ExportCalculatedChannel] | None = None, + use_legacy_format: bool = False, + simplify_channel_names: bool = False, + combine_runs: bool = False, + split_export_by_asset: bool = False, + split_export_by_run: bool = False, + polling_interval_secs: int = 5, + timeout_secs: int | None = None, + ) -> str: + """Export data within a time range. + + Both start_time and stop_time are required. Unlike the other export methods, + channel_ids or calculated_channel_configs should be provided to scope the data, + since there are no runs or assets to infer channels from. + + Args: + start_time: Start of the time range to export. + stop_time: End of the time range to export. + output_format: The file format for the export (CSV or SUN). + channel_ids: List of channel IDs to include in the export. + calculated_channel_configs: Optional inline calculated channels to include in the export. + use_legacy_format: Use legacy key-value metadata format for channel headers. + simplify_channel_names: Remove the component part of channel names if unique in the export. + combine_runs: Combine channels from the same asset across different runs into a single column. + split_export_by_asset: Split each asset into its own export file. + split_export_by_run: Split each run into its own export file. + polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. + timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. + + Returns: + A presigned download URL for the exported zip file. + + Raises: + TimeoutError: If the export job does not complete within timeout_secs. + """ + time_range = TimeRange() + time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) + time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) + + calc_configs = _build_calc_configs(calculated_channel_configs) + + export_options = ExportOptions( + use_legacy_format=use_legacy_format, + simplify_channel_names=simplify_channel_names, + combine_runs=combine_runs, + split_export_by_asset=split_export_by_asset, + split_export_by_run=split_export_by_run, + ) + + request = ExportDataRequest( + time_range=time_range, + channel_ids=channel_ids or [], + calculated_channel_configs=calc_configs or [], + output_format=output_format.value, + export_options=export_options, + ) + + response = await self._low_level_client.export_data(request=request) + + if response.presigned_url: + return response.presigned_url + return await self._await_download_url( + job_id=response.job_id, + polling_interval_secs=polling_interval_secs, + timeout_secs=timeout_secs, + ) + + async def _await_download_url( + self, job_id: str, polling_interval_secs: int = 5, timeout_secs: int | None = None + ) -> str: + # jobs api handles polling loop, timeout, and status checks + await self.client.async_.jobs.wait_until_complete( + job=job_id, polling_interval_secs=polling_interval_secs, timeout_secs=timeout_secs + ) + return await self._low_level_client.get_download_url(job_id=job_id) diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.py b/python/lib/sift_client/resources/sync_stubs/__init__.py index 3f6cc427c..11cf22e6b 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.py +++ b/python/lib/sift_client/resources/sync_stubs/__init__.py @@ -7,6 +7,7 @@ AssetsAPIAsync, CalculatedChannelsAPIAsync, ChannelsAPIAsync, + ExportsAPIAsync, FileAttachmentsAPIAsync, JobsAPIAsync, PingAPIAsync, @@ -28,11 +29,13 @@ ReportsAPI = generate_sync_api(ReportsAPIAsync, "ReportsAPI") TagsAPI = generate_sync_api(TagsAPIAsync, "TagsAPI") TestResultsAPI = generate_sync_api(TestResultsAPIAsync, "TestResultsAPI") +ExportsAPI = generate_sync_api(ExportsAPIAsync, "ExportsAPI") __all__ = [ "AssetsAPI", "CalculatedChannelsAPI", "ChannelsAPI", + "ExportsAPI", "FileAttachmentsAPI", "JobsAPI", "PingAPI", diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index 843a0061f..6b0bbbb0a 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -21,6 +21,7 @@ if TYPE_CHECKING: CalculatedChannelUpdate, ) from sift_client.sift_types.channel import Channel + from sift_client.sift_types.export import ExportCalculatedChannel, ExportOutputFormat from sift_client.sift_types.file_attachment import ( FileAttachment, FileAttachmentUpdate, @@ -533,6 +534,183 @@ class ChannelsAPI: """ ... +class ExportsAPI: + """Sync counterpart to `ExportsAPIAsync`. + + High-level API for exporting data from Sift. + + Provides three export methods based on how you want to scope the data: + + - ``export_by_run`` - Export data from one or more runs. + - ``export_by_asset`` - Export data from one or more assets within a time range. + - ``export_by_time_range`` - Export data within a time range (requires channel_ids or calculated_channel_configs). + + Each method handles the full export lifecycle: initiating the export, polling for + completion (if async), and returning the download URL. + + Example:: + + from sift_client.sift_types.export import ExportOutputFormat + + # Export by run + url = await client.async_.exports.export_by_run( + run_ids=["run-id-1"], + output_format=ExportOutputFormat.CSV, + ) + + # Export by asset with time range + url = await client.async_.exports.export_by_asset( + asset_ids=["asset-id-1"], + start_time=start, + stop_time=stop, + output_format=ExportOutputFormat.CSV, + ) + """ + + def __init__(self, sift_client: SiftClient): + """Initialize the ExportsAPI. + + Args: + sift_client: The Sift client to use. + """ + ... + + def _run(self, coro): ... + def export_by_asset( + self, + *, + asset_ids: list[str], + start_time: datetime, + stop_time: datetime, + output_format: ExportOutputFormat, + channel_ids: list[str] | None = None, + calculated_channel_configs: list[ExportCalculatedChannel] | None = None, + use_legacy_format: bool = False, + simplify_channel_names: bool = False, + combine_runs: bool = False, + split_export_by_asset: bool = False, + split_export_by_run: bool = False, + polling_interval_secs: int = 5, + timeout_secs: int | None = None, + ) -> str: + """Export data scoped by one or more assets within a time range. + + Both start_time and stop_time are required. If no channel_ids or + calculated_channel_configs are provided, all channels from the assets are included. + + Args: + asset_ids: One or more asset IDs to export data from. + start_time: Start of the time range to export. + stop_time: End of the time range to export. + output_format: The file format for the export (CSV or SUN). + channel_ids: Optional list of channel IDs to include. If omitted, all channels are exported. + calculated_channel_configs: Optional inline calculated channels to include in the export. + use_legacy_format: Use legacy key-value metadata format for channel headers. + simplify_channel_names: Remove the component part of channel names if unique in the export. + combine_runs: Combine channels from the same asset across different runs into a single column. + split_export_by_asset: Split each asset into its own export file. + split_export_by_run: Split each run into its own export file. + polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. + timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. + + Returns: + A presigned download URL for the exported zip file. + + Raises: + TimeoutError: If the export job does not complete within timeout_secs. + """ + ... + + def export_by_run( + self, + *, + run_ids: list[str], + output_format: ExportOutputFormat, + start_time: datetime | None = None, + stop_time: datetime | None = None, + channel_ids: list[str] | None = None, + calculated_channel_configs: list[ExportCalculatedChannel] | None = None, + use_legacy_format: bool = False, + simplify_channel_names: bool = False, + combine_runs: bool = False, + split_export_by_asset: bool = False, + split_export_by_run: bool = False, + polling_interval_secs: int = 5, + timeout_secs: int | None = None, + ) -> str: + """Export data scoped by one or more runs. + + If no start_time/stop_time are provided, the full time range of each run is used. + If no channel_ids or calculated_channel_configs are provided, all channels from + the run's assets are included. + + Args: + run_ids: One or more run IDs to export data from. + output_format: The file format for the export (CSV or SUN). + start_time: Optional start time to narrow the export within the run(s). + stop_time: Optional stop time to narrow the export within the run(s). + channel_ids: Optional list of channel IDs to include. If omitted, all channels are exported. + calculated_channel_configs: Optional inline calculated channels to include in the export. + use_legacy_format: Use legacy key-value metadata format for channel headers. + simplify_channel_names: Remove the component part of channel names if unique in the export. + combine_runs: Combine channels from the same asset across different runs into a single column. + split_export_by_asset: Split each asset into its own export file. + split_export_by_run: Split each run into its own export file. + polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. + timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. + + Returns: + A presigned download URL for the exported zip file. + + Raises: + TimeoutError: If the export job does not complete within timeout_secs. + """ + ... + + def export_by_time_range( + self, + *, + start_time: datetime, + stop_time: datetime, + output_format: ExportOutputFormat, + channel_ids: list[str] | None = None, + calculated_channel_configs: list[ExportCalculatedChannel] | None = None, + use_legacy_format: bool = False, + simplify_channel_names: bool = False, + combine_runs: bool = False, + split_export_by_asset: bool = False, + split_export_by_run: bool = False, + polling_interval_secs: int = 5, + timeout_secs: int | None = None, + ) -> str: + """Export data within a time range. + + Both start_time and stop_time are required. Unlike the other export methods, + channel_ids or calculated_channel_configs should be provided to scope the data, + since there are no runs or assets to infer channels from. + + Args: + start_time: Start of the time range to export. + stop_time: End of the time range to export. + output_format: The file format for the export (CSV or SUN). + channel_ids: List of channel IDs to include in the export. + calculated_channel_configs: Optional inline calculated channels to include in the export. + use_legacy_format: Use legacy key-value metadata format for channel headers. + simplify_channel_names: Remove the component part of channel names if unique in the export. + combine_runs: Combine channels from the same asset across different runs into a single column. + split_export_by_asset: Split each asset into its own export file. + split_export_by_run: Split each run into its own export file. + polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. + timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. + + Returns: + A presigned download URL for the exported zip file. + + Raises: + TimeoutError: If the export job does not complete within timeout_secs. + """ + ... + class FileAttachmentsAPI: """Sync counterpart to `FileAttachmentsAPIAsync`. diff --git a/python/lib/sift_client/sift_types/export.py b/python/lib/sift_client/sift_types/export.py new file mode 100644 index 000000000..3b39ce5c4 --- /dev/null +++ b/python/lib/sift_client/sift_types/export.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from enum import Enum + +from pydantic import BaseModel +from sift.exports.v1.exports_pb2 import ExportOutputFormat as ExportOutputFormatProto + + +class ExportOutputFormat(Enum): + """Supported output formats for data exports. + + Attributes: + CSV: Comma-separated values format. + SUN: winplot format. + """ + + CSV = ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_CSV + SUN = ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_SUN + + +class ChannelReference(BaseModel): + """Maps a placeholder variable in a calculated channel expression to an actual channel. + + Example:: + + ChannelReference( + channel_reference="velocity", # placeholder used as $velocity in the expression + channel_identifier="vehicle.engine.velocity", # the actual channel + ) + + Attributes: + channel_reference: The placeholder name used in the expression (without the $ prefix). + channel_identifier: The fully qualified channel name or channel ID. + """ + + channel_reference: str + channel_identifier: str + + +class ExportCalculatedChannel(BaseModel): + """An inline calculated channel to include in an export. + + Defines a formula-based channel that is computed at export time from existing channels. + + Example:: + + CalculatedChannel( + name="speed_doubled", + expression="$velocity * 2", + channel_references=[ + ChannelReference(channel_reference="velocity", channel_identifier="vehicle.engine.velocity"), + ], + units="m/s", + ) + + Attributes: + name: Display name for the calculated channel in the export. + expression: The formula to compute, using $placeholder syntax for channel references. + channel_references: Mappings from expression placeholders to actual channels. + units: Optional unit label for the calculated channel. + """ + + name: str + expression: str + channel_references: list[ChannelReference] + units: str | None = None diff --git a/python/lib/sift_client/util/util.py b/python/lib/sift_client/util/util.py index 3800f91a7..bd1c741f3 100644 --- a/python/lib/sift_client/util/util.py +++ b/python/lib/sift_client/util/util.py @@ -7,6 +7,7 @@ AssetsAPIAsync, CalculatedChannelsAPIAsync, ChannelsAPIAsync, + ExportsAPIAsync, FileAttachmentsAPIAsync, IngestionAPIAsync, JobsAPIAsync, @@ -58,6 +59,9 @@ class AsyncAPIs(NamedTuple): test_results: TestResultsAPIAsync """Instance of the Test Results API for making asynchronous requests.""" + exports: ExportsAPIAsync + """Instance of the Exports API for making asynchronous requests.""" + def count_non_none(*args: Any) -> int: """Count the number of non-none arguments.""" From 7da838137189223be17e12fe6c735ae4d4380363 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 10 Mar 2026 16:14:47 -0700 Subject: [PATCH 03/53] python(fix): updated sift_type export docstrings to handle channel_reference exports --- python/lib/sift_client/sift_types/export.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/lib/sift_client/sift_types/export.py b/python/lib/sift_client/sift_types/export.py index 3b39ce5c4..22273485a 100644 --- a/python/lib/sift_client/sift_types/export.py +++ b/python/lib/sift_client/sift_types/export.py @@ -24,13 +24,13 @@ class ChannelReference(BaseModel): Example:: ChannelReference( - channel_reference="velocity", # placeholder used as $velocity in the expression - channel_identifier="vehicle.engine.velocity", # the actual channel + channel_reference="$1", # must match the placeholder exactly as it appears in the expression + channel_identifier="cbddaf97-3332-4666-80f2-a19be6a77eef", # channel UUID ) Attributes: - channel_reference: The placeholder name used in the expression (without the $ prefix). - channel_identifier: The fully qualified channel name or channel ID. + channel_reference: The placeholder as it appears in the expression, i.e. $1, $2, etc. + channel_identifier: The channel UUID. """ channel_reference: str @@ -44,11 +44,11 @@ class ExportCalculatedChannel(BaseModel): Example:: - CalculatedChannel( + ExportCalculatedChannel( name="speed_doubled", - expression="$velocity * 2", + expression="$1 * 2", channel_references=[ - ChannelReference(channel_reference="velocity", channel_identifier="vehicle.engine.velocity"), + ChannelReference(channel_reference="$1", channel_identifier=""), ], units="m/s", ) From 07ea61da585cd9c0ff9553337d7c71e52937f1f9 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 11 Mar 2026 13:18:30 -0700 Subject: [PATCH 04/53] python(fix): enforce channel_ids or calculated_channel_config for export_by_time_range --- python/lib/sift_client/resources/exports.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 3b66abfc6..934d09135 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -259,16 +259,16 @@ async def export_by_time_range( ) -> str: """Export data within a time range. - Both start_time and stop_time are required. Unlike the other export methods, - channel_ids or calculated_channel_configs should be provided to scope the data, - since there are no runs or assets to infer channels from. + Both start_time and stop_time are required. At least one of channel_ids or + calculated_channel_configs **must** be provided to scope the data, since there + are no runs or assets to infer channels from. Args: start_time: Start of the time range to export. stop_time: End of the time range to export. output_format: The file format for the export (CSV or SUN). channel_ids: List of channel IDs to include in the export. - calculated_channel_configs: Optional inline calculated channels to include in the export. + calculated_channel_configs: Inline calculated channels to include in the export. use_legacy_format: Use legacy key-value metadata format for channel headers. simplify_channel_names: Remove the component part of channel names if unique in the export. combine_runs: Combine channels from the same asset across different runs into a single column. @@ -281,8 +281,15 @@ async def export_by_time_range( A presigned download URL for the exported zip file. Raises: + ValueError: If neither channel_ids nor calculated_channel_configs is provided. TimeoutError: If the export job does not complete within timeout_secs. """ + if not channel_ids and not calculated_channel_configs: + raise ValueError( + "At least one of 'channel_ids' or 'calculated_channel_configs' must be provided " + "when exporting by time range." + ) + time_range = TimeRange() time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) From 7327208163f2661bb26654c55908f64fb152e2dd Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 11 Mar 2026 17:50:53 -0700 Subject: [PATCH 05/53] python(fix): rename internal grpc module to _grpc to aovid shadowing grpcio --- .../_internal/{grpc => _grpc}/__init__.py | 2 +- .../{grpc => _grpc}/_async_interceptors/__init__.py | 0 .../{grpc => _grpc}/_async_interceptors/base.py | 0 .../{grpc => _grpc}/_async_interceptors/metadata.py | 2 +- .../{grpc => _grpc}/_interceptors/__init__.py | 0 .../_internal/{grpc => _grpc}/_interceptors/base.py | 0 .../{grpc => _grpc}/_interceptors/context.py | 0 .../{grpc => _grpc}/_interceptors/metadata.py | 4 ++-- .../sift_client/_internal/{grpc => _grpc}/_retry.py | 0 .../_internal/{grpc => _grpc}/keepalive.py | 0 .../{grpc => _grpc}/server_interceptors/__init__.py | 0 .../server_interceptors/server_interceptor.py | 0 .../_internal/{grpc => _grpc}/transport.py | 12 ++++++------ .../_internal/{grpc => _grpc}/transport_test.py | 4 ++-- python/lib/sift_client/_internal/rest.py | 2 +- python/lib/sift_client/transport/grpc_transport.py | 2 +- 16 files changed, 14 insertions(+), 14 deletions(-) rename python/lib/sift_client/_internal/{grpc => _grpc}/__init__.py (82%) rename python/lib/sift_client/_internal/{grpc => _grpc}/_async_interceptors/__init__.py (100%) rename python/lib/sift_client/_internal/{grpc => _grpc}/_async_interceptors/base.py (100%) rename python/lib/sift_client/_internal/{grpc => _grpc}/_async_interceptors/metadata.py (91%) rename python/lib/sift_client/_internal/{grpc => _grpc}/_interceptors/__init__.py (100%) rename python/lib/sift_client/_internal/{grpc => _grpc}/_interceptors/base.py (100%) rename python/lib/sift_client/_internal/{grpc => _grpc}/_interceptors/context.py (100%) rename python/lib/sift_client/_internal/{grpc => _grpc}/_interceptors/metadata.py (81%) rename python/lib/sift_client/_internal/{grpc => _grpc}/_retry.py (100%) rename python/lib/sift_client/_internal/{grpc => _grpc}/keepalive.py (100%) rename python/lib/sift_client/_internal/{grpc => _grpc}/server_interceptors/__init__.py (100%) rename python/lib/sift_client/_internal/{grpc => _grpc}/server_interceptors/server_interceptor.py (100%) rename python/lib/sift_client/_internal/{grpc => _grpc}/transport.py (94%) rename python/lib/sift_client/_internal/{grpc => _grpc}/transport_test.py (97%) diff --git a/python/lib/sift_client/_internal/grpc/__init__.py b/python/lib/sift_client/_internal/_grpc/__init__.py similarity index 82% rename from python/lib/sift_client/_internal/grpc/__init__.py rename to python/lib/sift_client/_internal/_grpc/__init__.py index 738259dc8..85a0bd3ce 100644 --- a/python/lib/sift_client/_internal/grpc/__init__.py +++ b/python/lib/sift_client/_internal/_grpc/__init__.py @@ -4,7 +4,7 @@ Example of establishing a connection to Sift's gRPC APi: ```python -from sift_client._internal.grpc.transport import SiftChannelConfig, use_sift_channel +from sift_client._internal._grpc.transport import SiftChannelConfig, use_sift_channel # Be sure not to include the url scheme i.e. 'https://' in the uri. sift_channel_config = SiftChannelConfig(uri=SIFT_BASE_URI, apikey=SIFT_API_KEY) diff --git a/python/lib/sift_client/_internal/grpc/_async_interceptors/__init__.py b/python/lib/sift_client/_internal/_grpc/_async_interceptors/__init__.py similarity index 100% rename from python/lib/sift_client/_internal/grpc/_async_interceptors/__init__.py rename to python/lib/sift_client/_internal/_grpc/_async_interceptors/__init__.py diff --git a/python/lib/sift_client/_internal/grpc/_async_interceptors/base.py b/python/lib/sift_client/_internal/_grpc/_async_interceptors/base.py similarity index 100% rename from python/lib/sift_client/_internal/grpc/_async_interceptors/base.py rename to python/lib/sift_client/_internal/_grpc/_async_interceptors/base.py diff --git a/python/lib/sift_client/_internal/grpc/_async_interceptors/metadata.py b/python/lib/sift_client/_internal/_grpc/_async_interceptors/metadata.py similarity index 91% rename from python/lib/sift_client/_internal/grpc/_async_interceptors/metadata.py rename to python/lib/sift_client/_internal/_grpc/_async_interceptors/metadata.py index 95cc5a925..08e601a95 100644 --- a/python/lib/sift_client/_internal/grpc/_async_interceptors/metadata.py +++ b/python/lib/sift_client/_internal/_grpc/_async_interceptors/metadata.py @@ -4,7 +4,7 @@ from grpc import aio as grpc_aio -from sift_client._internal.grpc._async_interceptors.base import ClientAsyncInterceptor +from sift_client._internal._grpc._async_interceptors.base import ClientAsyncInterceptor Metadata = List[Tuple[str, str]] diff --git a/python/lib/sift_client/_internal/grpc/_interceptors/__init__.py b/python/lib/sift_client/_internal/_grpc/_interceptors/__init__.py similarity index 100% rename from python/lib/sift_client/_internal/grpc/_interceptors/__init__.py rename to python/lib/sift_client/_internal/_grpc/_interceptors/__init__.py diff --git a/python/lib/sift_client/_internal/grpc/_interceptors/base.py b/python/lib/sift_client/_internal/_grpc/_interceptors/base.py similarity index 100% rename from python/lib/sift_client/_internal/grpc/_interceptors/base.py rename to python/lib/sift_client/_internal/_grpc/_interceptors/base.py diff --git a/python/lib/sift_client/_internal/grpc/_interceptors/context.py b/python/lib/sift_client/_internal/_grpc/_interceptors/context.py similarity index 100% rename from python/lib/sift_client/_internal/grpc/_interceptors/context.py rename to python/lib/sift_client/_internal/_grpc/_interceptors/context.py diff --git a/python/lib/sift_client/_internal/grpc/_interceptors/metadata.py b/python/lib/sift_client/_internal/_grpc/_interceptors/metadata.py similarity index 81% rename from python/lib/sift_client/_internal/grpc/_interceptors/metadata.py rename to python/lib/sift_client/_internal/_grpc/_interceptors/metadata.py index afb5da50c..fdc4fd223 100644 --- a/python/lib/sift_client/_internal/grpc/_interceptors/metadata.py +++ b/python/lib/sift_client/_internal/_grpc/_interceptors/metadata.py @@ -2,8 +2,8 @@ import grpc -from sift_client._internal.grpc._interceptors.base import ClientInterceptor, Continuation -from sift_client._internal.grpc._interceptors.context import ClientCallDetails +from sift_client._internal._grpc._interceptors.base import ClientInterceptor, Continuation +from sift_client._internal._grpc._interceptors.context import ClientCallDetails Metadata = List[Tuple[str, str]] diff --git a/python/lib/sift_client/_internal/grpc/_retry.py b/python/lib/sift_client/_internal/_grpc/_retry.py similarity index 100% rename from python/lib/sift_client/_internal/grpc/_retry.py rename to python/lib/sift_client/_internal/_grpc/_retry.py diff --git a/python/lib/sift_client/_internal/grpc/keepalive.py b/python/lib/sift_client/_internal/_grpc/keepalive.py similarity index 100% rename from python/lib/sift_client/_internal/grpc/keepalive.py rename to python/lib/sift_client/_internal/_grpc/keepalive.py diff --git a/python/lib/sift_client/_internal/grpc/server_interceptors/__init__.py b/python/lib/sift_client/_internal/_grpc/server_interceptors/__init__.py similarity index 100% rename from python/lib/sift_client/_internal/grpc/server_interceptors/__init__.py rename to python/lib/sift_client/_internal/_grpc/server_interceptors/__init__.py diff --git a/python/lib/sift_client/_internal/grpc/server_interceptors/server_interceptor.py b/python/lib/sift_client/_internal/_grpc/server_interceptors/server_interceptor.py similarity index 100% rename from python/lib/sift_client/_internal/grpc/server_interceptors/server_interceptor.py rename to python/lib/sift_client/_internal/_grpc/server_interceptors/server_interceptor.py diff --git a/python/lib/sift_client/_internal/grpc/transport.py b/python/lib/sift_client/_internal/_grpc/transport.py similarity index 94% rename from python/lib/sift_client/_internal/grpc/transport.py rename to python/lib/sift_client/_internal/_grpc/transport.py index 1043245a8..87dc9b3ec 100644 --- a/python/lib/sift_client/_internal/grpc/transport.py +++ b/python/lib/sift_client/_internal/_grpc/transport.py @@ -14,14 +14,14 @@ import grpc.aio as grpc_aio from typing_extensions import NotRequired, TypeAlias -from sift_client._internal.grpc._async_interceptors.metadata import MetadataAsyncInterceptor -from sift_client._internal.grpc._interceptors.metadata import Metadata, MetadataInterceptor +from sift_client._internal._grpc._async_interceptors.metadata import MetadataAsyncInterceptor +from sift_client._internal._grpc._interceptors.metadata import Metadata, MetadataInterceptor if TYPE_CHECKING: - from sift_client._internal.grpc._async_interceptors.base import ClientAsyncInterceptor - from sift_client._internal.grpc._interceptors.base import ClientInterceptor -from sift_client._internal.grpc._retry import RetryPolicy -from sift_client._internal.grpc.keepalive import DEFAULT_KEEPALIVE_CONFIG, KeepaliveConfig + from sift_client._internal._grpc._async_interceptors.base import ClientAsyncInterceptor + from sift_client._internal._grpc._interceptors.base import ClientInterceptor +from sift_client._internal._grpc._retry import RetryPolicy +from sift_client._internal._grpc.keepalive import DEFAULT_KEEPALIVE_CONFIG, KeepaliveConfig SiftChannel: TypeAlias = grpc.Channel SiftAsyncChannel: TypeAlias = grpc_aio.Channel diff --git a/python/lib/sift_client/_internal/grpc/transport_test.py b/python/lib/sift_client/_internal/_grpc/transport_test.py similarity index 97% rename from python/lib/sift_client/_internal/grpc/transport_test.py rename to python/lib/sift_client/_internal/_grpc/transport_test.py index efccb6b4e..12e03f78a 100644 --- a/python/lib/sift_client/_internal/grpc/transport_test.py +++ b/python/lib/sift_client/_internal/_grpc/transport_test.py @@ -15,8 +15,8 @@ add_DataServiceServicer_to_server, ) -from sift_client._internal.grpc.server_interceptors.server_interceptor import ServerInterceptor -from sift_client._internal.grpc.transport import SiftChannelConfig, use_sift_channel +from sift_client._internal._grpc.server_interceptors.server_interceptor import ServerInterceptor +from sift_client._internal._grpc.transport import SiftChannelConfig, use_sift_channel class DataService(DataServiceServicer): diff --git a/python/lib/sift_client/_internal/rest.py b/python/lib/sift_client/_internal/rest.py index 5f5c954c3..3c89045c9 100644 --- a/python/lib/sift_client/_internal/rest.py +++ b/python/lib/sift_client/_internal/rest.py @@ -6,7 +6,7 @@ from typing_extensions import NotRequired from urllib3.util import Retry -from sift_client._internal.grpc.transport import _clean_uri +from sift_client._internal._grpc.transport import _clean_uri _DEFAULT_REST_RETRY = Retry(total=3, status_forcelist=[500, 502, 503, 504], backoff_factor=1) diff --git a/python/lib/sift_client/transport/grpc_transport.py b/python/lib/sift_client/transport/grpc_transport.py index 95817a010..ba728980f 100644 --- a/python/lib/sift_client/transport/grpc_transport.py +++ b/python/lib/sift_client/transport/grpc_transport.py @@ -13,7 +13,7 @@ from typing import Any from urllib.parse import urlparse -from sift_client._internal.grpc.transport import ( +from sift_client._internal._grpc.transport import ( SiftChannelConfig, use_sift_async_channel, ) From 8748f8cabd2d98d5fde09c39bd8a6efc07136235 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 11 Mar 2026 17:51:38 -0700 Subject: [PATCH 06/53] python(fix): add input validation and job status checks to export methods --- python/lib/sift_client/resources/exports.py | 26 +++++++++++++++++-- .../resources/sync_stubs/__init__.pyi | 9 ++++--- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 934d09135..d1a190619 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -132,6 +132,13 @@ async def export_by_run( Raises: TimeoutError: If the export job does not complete within timeout_secs. """ + if not run_ids: + raise ValueError("'run_ids' must be a non-empty list of run IDs.") + if any(not run_id for run_id in run_ids): + raise ValueError("'run_ids' must not contain empty or null values.") + if start_time and stop_time and start_time >= stop_time: + raise ValueError("'start_time' must be before 'stop_time'.") + runs_and_time_range = RunsAndTimeRange(run_ids=run_ids) if start_time: runs_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) @@ -209,6 +216,13 @@ async def export_by_asset( Raises: TimeoutError: If the export job does not complete within timeout_secs. """ + if not asset_ids: + raise ValueError("'asset_ids' must be a non-empty list of asset IDs.") + if any(not asset_id for asset_id in asset_ids): + raise ValueError("'asset_ids' must not contain empty or null values.") + if start_time >= stop_time: + raise ValueError("'start_time' must be before 'stop_time'.") + assets_and_time_range = AssetsAndTimeRange(asset_ids=asset_ids) assets_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) assets_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) @@ -289,6 +303,8 @@ async def export_by_time_range( "At least one of 'channel_ids' or 'calculated_channel_configs' must be provided " "when exporting by time range." ) + if start_time >= stop_time: + raise ValueError("'start_time' must be before 'stop_time'.") time_range = TimeRange() time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) @@ -325,8 +341,14 @@ async def export_by_time_range( async def _await_download_url( self, job_id: str, polling_interval_secs: int = 5, timeout_secs: int | None = None ) -> str: - # jobs api handles polling loop, timeout, and status checks - await self.client.async_.jobs.wait_until_complete( + """Poll a background export job until complete, then return the download URL.""" + from sift_client.sift_types.job import JobStatus + + job = await self.client.async_.jobs.wait_until_complete( job=job_id, polling_interval_secs=polling_interval_secs, timeout_secs=timeout_secs ) + if job.job_status == JobStatus.FAILED: + raise RuntimeError(f"Export job '{job_id}' failed.") + if job.job_status == JobStatus.CANCELLED: + raise RuntimeError(f"Export job '{job_id}' was cancelled.") return await self._low_level_client.get_download_url(job_id=job_id) diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index 6b0bbbb0a..9b2bc1628 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -685,16 +685,16 @@ class ExportsAPI: ) -> str: """Export data within a time range. - Both start_time and stop_time are required. Unlike the other export methods, - channel_ids or calculated_channel_configs should be provided to scope the data, - since there are no runs or assets to infer channels from. + Both start_time and stop_time are required. At least one of channel_ids or + calculated_channel_configs **must** be provided to scope the data, since there + are no runs or assets to infer channels from. Args: start_time: Start of the time range to export. stop_time: End of the time range to export. output_format: The file format for the export (CSV or SUN). channel_ids: List of channel IDs to include in the export. - calculated_channel_configs: Optional inline calculated channels to include in the export. + calculated_channel_configs: Inline calculated channels to include in the export. use_legacy_format: Use legacy key-value metadata format for channel headers. simplify_channel_names: Remove the component part of channel names if unique in the export. combine_runs: Combine channels from the same asset across different runs into a single column. @@ -707,6 +707,7 @@ class ExportsAPI: A presigned download URL for the exported zip file. Raises: + ValueError: If neither channel_ids nor calculated_channel_configs is provided. TimeoutError: If the export job does not complete within timeout_secs. """ ... From e04bfae60de3d2ae3ce1f49b6a3a28667ae8b6e4 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 12 Mar 2026 11:45:20 -0700 Subject: [PATCH 07/53] python(fix): added timestamp checks and unit tests for exports --- .../_tests/resources/test_exports.py | 414 ++++++++++++++++++ python/lib/sift_client/resources/exports.py | 12 +- 2 files changed, 424 insertions(+), 2 deletions(-) create mode 100644 python/lib/sift_client/_tests/resources/test_exports.py diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py new file mode 100644 index 000000000..5318c08c9 --- /dev/null +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -0,0 +1,414 @@ +"""Pytest tests for the Exports API. + +These tests validate the usage of the ExportsAPIAsync including: +- Request construction for all three export methods (by run, asset, time range) +- Synchronous (presigned_url) and asynchronous (job polling) response handling +- Calculated channel config conversion to proto messages +- Input validation and error handling +""" + +from __future__ import annotations + +from datetime import datetime, timezone +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from sift.exports.v1.exports_pb2 import ( + ExportDataResponse, +) +from sift.exports.v1.exports_pb2 import ( + ExportOutputFormat as ExportOutputFormatProto, +) + +from sift_client.resources.exports import ExportsAPIAsync, _build_calc_configs +from sift_client.sift_types.export import ( + ChannelReference, + ExportCalculatedChannel, + ExportOutputFormat, +) +from sift_client.sift_types.job import DataExportStatusDetails, Job, JobStatus + + +@pytest.fixture +def mock_client(): + """Create a mock SiftClient for unit testing.""" + client = MagicMock() + client.grpc_client = MagicMock() + client.async_ = MagicMock() + client.async_.jobs = MagicMock() + return client + + +@pytest.fixture +def exports_api(mock_client): + """Create an ExportsAPIAsync with a mocked low-level client.""" + with patch("sift_client.resources.exports.ExportsLowLevelClient", autospec=True) as mock_ll: + api = ExportsAPIAsync(mock_client) + api._low_level_client = mock_ll.return_value + return api + + +@pytest.fixture +def sample_calc_channels(): + """Create sample calculated channel configs for testing.""" + return [ + ExportCalculatedChannel( + name="speed_doubled", + expression="$1 * 2", + channel_references=[ + ChannelReference(channel_reference="$1", channel_identifier="ch-uuid-1"), + ], + units="m/s", + ), + ExportCalculatedChannel( + name="no_units", + expression="$1 + $2", + channel_references=[ + ChannelReference(channel_reference="$1", channel_identifier="ch-uuid-1"), + ChannelReference(channel_reference="$2", channel_identifier="ch-uuid-2"), + ], + ), + ] + + +START = datetime(2025, 1, 1, tzinfo=timezone.utc) +STOP = datetime(2025, 1, 2, tzinfo=timezone.utc) + + +class TestBuildCalcConfigs: + """Tests for the _build_calc_configs helper.""" + + def test_returns_none_for_none(self): + """Test that None input returns None.""" + assert _build_calc_configs(None) is None + + def test_returns_none_for_empty_list(self): + """Test that an empty list returns None.""" + assert _build_calc_configs([]) is None + + def test_converts_to_proto(self, sample_calc_channels): + """Test converting Pydantic models to proto CalculatedChannelConfig messages.""" + result = _build_calc_configs(sample_calc_channels) + assert len(result) == 2 + + first = result[0] + assert first.name == "speed_doubled" + assert first.expression == "$1 * 2" + assert first.units == "m/s" + assert len(first.channel_references) == 1 + assert first.channel_references[0].channel_reference == "$1" + assert first.channel_references[0].channel_identifier == "ch-uuid-1" + + second = result[1] + assert second.name == "no_units" + assert second.units == "" # proto default for unset optional string + assert len(second.channel_references) == 2 + + +class TestExportByRun: + """Tests for the export_by_run method.""" + + @pytest.mark.asyncio + async def test_builds_correct_request_and_returns_presigned_url(self, exports_api): + """Test request construction with all parameters and synchronous presigned URL response.""" + exports_api._low_level_client.export_data = AsyncMock( + return_value=ExportDataResponse(presigned_url="https://download.test/run.zip") + ) + + url = await exports_api.export_by_run( + run_ids=["run-1", "run-2"], + output_format=ExportOutputFormat.CSV, + start_time=START, + stop_time=STOP, + channel_ids=["ch-1"], + use_legacy_format=True, + simplify_channel_names=True, + combine_runs=True, + split_export_by_asset=True, + split_export_by_run=True, + ) + + assert url == "https://download.test/run.zip" + req = exports_api._low_level_client.export_data.call_args.kwargs["request"] + assert list(req.runs_and_time_range.run_ids) == ["run-1", "run-2"] + assert req.runs_and_time_range.HasField("start_time") + assert req.runs_and_time_range.HasField("stop_time") + assert list(req.channel_ids) == ["ch-1"] + assert req.output_format == ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_CSV + assert req.export_options.use_legacy_format is True + assert req.export_options.simplify_channel_names is True + assert req.export_options.combine_runs is True + assert req.export_options.split_export_by_asset is True + assert req.export_options.split_export_by_run is True + + @pytest.mark.asyncio + async def test_minimal_args(self, exports_api): + """Test request construction with only required parameters.""" + exports_api._low_level_client.export_data = AsyncMock( + return_value=ExportDataResponse(presigned_url="https://download.test/min.zip") + ) + + url = await exports_api.export_by_run( + run_ids=["run-1"], + output_format=ExportOutputFormat.SUN, + ) + + assert url == "https://download.test/min.zip" + req = exports_api._low_level_client.export_data.call_args.kwargs["request"] + assert list(req.runs_and_time_range.run_ids) == ["run-1"] + assert not req.runs_and_time_range.HasField("start_time") + assert not req.runs_and_time_range.HasField("stop_time") + assert list(req.channel_ids) == [] + assert req.output_format == ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_SUN + + @pytest.mark.asyncio + async def test_with_calculated_channels(self, exports_api, sample_calc_channels): + """Test that calculated channel configs are included in the request.""" + exports_api._low_level_client.export_data = AsyncMock( + return_value=ExportDataResponse(presigned_url="https://download.test/calc.zip") + ) + + await exports_api.export_by_run( + run_ids=["run-1"], + output_format=ExportOutputFormat.CSV, + calculated_channel_configs=sample_calc_channels, + ) + + req = exports_api._low_level_client.export_data.call_args.kwargs["request"] + assert len(req.calculated_channel_configs) == 2 + assert req.calculated_channel_configs[0].name == "speed_doubled" + + @pytest.mark.asyncio + async def test_async_job_path(self, exports_api, mock_client): + """Test that an empty presigned_url falls back to job polling and get_download_url.""" + exports_api._low_level_client.export_data = AsyncMock( + return_value=ExportDataResponse(job_id="job-123") + ) + + mock_job = MagicMock(spec=Job) + mock_job.job_status = JobStatus.FINISHED + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=mock_job) + + exports_api._low_level_client.get_download_url = AsyncMock( + return_value="https://download.test/async.zip" + ) + + url = await exports_api.export_by_run( + run_ids=["run-1"], + output_format=ExportOutputFormat.CSV, + polling_interval_secs=1, + timeout_secs=10, + ) + + assert url == "https://download.test/async.zip" + mock_client.async_.jobs.wait_until_complete.assert_awaited_once_with( + job="job-123", polling_interval_secs=1, timeout_secs=10 + ) + exports_api._low_level_client.get_download_url.assert_awaited_once_with(job_id="job-123") + + @pytest.mark.asyncio + async def test_async_job_failed_raises_with_reason(self, exports_api, mock_client): + """Test that a failed job raises RuntimeError with the error message from status details.""" + exports_api._low_level_client.export_data = AsyncMock( + return_value=ExportDataResponse(job_id="job-fail") + ) + mock_job = MagicMock(spec=Job) + mock_job.job_status = JobStatus.FAILED + mock_job.job_status_details = DataExportStatusDetails(error_message="out of memory") + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=mock_job) + + with pytest.raises(RuntimeError, match=r"failed.*out of memory"): + await exports_api.export_by_run(run_ids=["run-1"], output_format=ExportOutputFormat.CSV) + + @pytest.mark.asyncio + async def test_async_job_failed_raises_without_reason(self, exports_api, mock_client): + """Test that a failed job with no status details still raises RuntimeError.""" + exports_api._low_level_client.export_data = AsyncMock( + return_value=ExportDataResponse(job_id="job-fail") + ) + mock_job = MagicMock(spec=Job) + mock_job.job_status = JobStatus.FAILED + mock_job.job_status_details = None + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=mock_job) + + with pytest.raises(RuntimeError, match="failed"): + await exports_api.export_by_run(run_ids=["run-1"], output_format=ExportOutputFormat.CSV) + + @pytest.mark.asyncio + async def test_async_job_cancelled_raises(self, exports_api, mock_client): + """Test that a cancelled job raises RuntimeError.""" + exports_api._low_level_client.export_data = AsyncMock( + return_value=ExportDataResponse(job_id="job-cancel") + ) + mock_job = MagicMock(spec=Job) + mock_job.job_status = JobStatus.CANCELLED + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=mock_job) + + with pytest.raises(RuntimeError, match="cancelled"): + await exports_api.export_by_run(run_ids=["run-1"], output_format=ExportOutputFormat.CSV) + + @pytest.mark.asyncio + async def test_empty_run_ids_raises(self, exports_api): + """Test that an empty run_ids list raises ValueError.""" + with pytest.raises(ValueError, match="run_ids"): + await exports_api.export_by_run(run_ids=[], output_format=ExportOutputFormat.CSV) + + @pytest.mark.asyncio + async def test_null_run_id_raises(self, exports_api): + """Test that a run_ids list containing an empty string raises ValueError.""" + with pytest.raises(ValueError, match="empty or null"): + await exports_api.export_by_run( + run_ids=["", "run-1"], output_format=ExportOutputFormat.CSV + ) + + @pytest.mark.asyncio + async def test_start_after_stop_raises(self, exports_api): + """Test that start_time >= stop_time raises ValueError.""" + with pytest.raises(ValueError, match="start_time"): + await exports_api.export_by_run( + run_ids=["run-1"], + output_format=ExportOutputFormat.CSV, + start_time=STOP, + stop_time=START, + ) + + @pytest.mark.asyncio + async def test_start_without_stop_raises(self, exports_api): + """Test that providing start_time without stop_time raises ValueError.""" + with pytest.raises(ValueError, match="both be provided or both omitted"): + await exports_api.export_by_run( + run_ids=["run-1"], + output_format=ExportOutputFormat.CSV, + start_time=START, + ) + + @pytest.mark.asyncio + async def test_stop_without_start_raises(self, exports_api): + """Test that providing stop_time without start_time raises ValueError.""" + with pytest.raises(ValueError, match="both be provided or both omitted"): + await exports_api.export_by_run( + run_ids=["run-1"], + output_format=ExportOutputFormat.CSV, + stop_time=STOP, + ) + + +class TestExportByAsset: + """Tests for the export_by_asset method.""" + + @pytest.mark.asyncio + async def test_builds_correct_request(self, exports_api): + """Test request construction with assets, time range, and channel IDs.""" + exports_api._low_level_client.export_data = AsyncMock( + return_value=ExportDataResponse(presigned_url="https://download.test/asset.zip") + ) + + url = await exports_api.export_by_asset( + asset_ids=["asset-1"], + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.CSV, + channel_ids=["ch-1", "ch-2"], + ) + + assert url == "https://download.test/asset.zip" + req = exports_api._low_level_client.export_data.call_args.kwargs["request"] + assert list(req.assets_and_time_range.asset_ids) == ["asset-1"] + assert req.assets_and_time_range.HasField("start_time") + assert req.assets_and_time_range.HasField("stop_time") + assert list(req.channel_ids) == ["ch-1", "ch-2"] + assert req.output_format == ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_CSV + + @pytest.mark.asyncio + async def test_empty_asset_ids_raises(self, exports_api): + """Test that an empty asset_ids list raises ValueError.""" + with pytest.raises(ValueError, match="asset_ids"): + await exports_api.export_by_asset( + asset_ids=[], start_time=START, stop_time=STOP, output_format=ExportOutputFormat.CSV + ) + + @pytest.mark.asyncio + async def test_null_asset_id_raises(self, exports_api): + """Test that an asset_ids list containing an empty string raises ValueError.""" + with pytest.raises(ValueError, match="empty or null"): + await exports_api.export_by_asset( + asset_ids=[""], + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.CSV, + ) + + @pytest.mark.asyncio + async def test_start_after_stop_raises(self, exports_api): + """Test that start_time >= stop_time raises ValueError.""" + with pytest.raises(ValueError, match="start_time"): + await exports_api.export_by_asset( + asset_ids=["asset-1"], + start_time=STOP, + stop_time=START, + output_format=ExportOutputFormat.CSV, + ) + + +class TestExportByTimeRange: + """Tests for the export_by_time_range method.""" + + @pytest.mark.asyncio + async def test_builds_correct_request_with_channel_ids(self, exports_api): + """Test request construction with time range and channel IDs.""" + exports_api._low_level_client.export_data = AsyncMock( + return_value=ExportDataResponse(presigned_url="https://download.test/time.zip") + ) + + url = await exports_api.export_by_time_range( + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.SUN, + channel_ids=["ch-1"], + ) + + assert url == "https://download.test/time.zip" + req = exports_api._low_level_client.export_data.call_args.kwargs["request"] + assert req.time_range.HasField("start_time") + assert req.time_range.HasField("stop_time") + assert list(req.channel_ids) == ["ch-1"] + assert req.output_format == ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_SUN + + @pytest.mark.asyncio + async def test_builds_correct_request_with_calc_channels( + self, exports_api, sample_calc_channels + ): + """Test request construction with calculated channels instead of channel IDs.""" + exports_api._low_level_client.export_data = AsyncMock( + return_value=ExportDataResponse(presigned_url="https://download.test/calc.zip") + ) + + await exports_api.export_by_time_range( + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.CSV, + calculated_channel_configs=sample_calc_channels, + ) + + req = exports_api._low_level_client.export_data.call_args.kwargs["request"] + assert len(req.calculated_channel_configs) == 2 + assert list(req.channel_ids) == [] + + @pytest.mark.asyncio + async def test_no_channels_raises(self, exports_api): + """Test that omitting both channel_ids and calculated_channel_configs raises ValueError.""" + with pytest.raises(ValueError, match=r"channel_ids.*calculated_channel_configs"): + await exports_api.export_by_time_range( + start_time=START, stop_time=STOP, output_format=ExportOutputFormat.CSV + ) + + @pytest.mark.asyncio + async def test_start_after_stop_raises(self, exports_api): + """Test that start_time >= stop_time raises ValueError.""" + with pytest.raises(ValueError, match="start_time"): + await exports_api.export_by_time_range( + start_time=STOP, + stop_time=START, + output_format=ExportOutputFormat.CSV, + channel_ids=["ch-1"], + ) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index d1a190619..fb0d87979 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -136,6 +136,8 @@ async def export_by_run( raise ValueError("'run_ids' must be a non-empty list of run IDs.") if any(not run_id for run_id in run_ids): raise ValueError("'run_ids' must not contain empty or null values.") + if (start_time is None) != (stop_time is None): + raise ValueError("'start_time' and 'stop_time' must both be provided or both omitted.") if start_time and stop_time and start_time >= stop_time: raise ValueError("'start_time' must be before 'stop_time'.") @@ -342,13 +344,19 @@ async def _await_download_url( self, job_id: str, polling_interval_secs: int = 5, timeout_secs: int | None = None ) -> str: """Poll a background export job until complete, then return the download URL.""" - from sift_client.sift_types.job import JobStatus + from sift_client.sift_types.job import DataExportStatusDetails, JobStatus job = await self.client.async_.jobs.wait_until_complete( job=job_id, polling_interval_secs=polling_interval_secs, timeout_secs=timeout_secs ) if job.job_status == JobStatus.FAILED: - raise RuntimeError(f"Export job '{job_id}' failed.") + reason = "" + if ( + isinstance(job.job_status_details, DataExportStatusDetails) + and job.job_status_details.error_message + ): + reason = f": {job.job_status_details.error_message}" + raise RuntimeError(f"Export job '{job_id}' failed{reason}") if job.job_status == JobStatus.CANCELLED: raise RuntimeError(f"Export job '{job_id}' was cancelled.") return await self._low_level_client.get_download_url(job_id=job_id) From db372c6420d749cfa21ad91c985f0764cc8017db Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 12 Mar 2026 11:53:08 -0700 Subject: [PATCH 08/53] python(fix): updated docstring args to match export options in UI --- python/lib/sift_client/resources/exports.py | 36 ++++++++++----------- python/lib/sift_client/sift_types/export.py | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index fb0d87979..dc6fb0ba2 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -113,16 +113,16 @@ async def export_by_run( Args: run_ids: One or more run IDs to export data from. - output_format: The file format for the export (CSV or SUN). + output_format: The file format for the export (CSV or Sun/WinPlot). start_time: Optional start time to narrow the export within the run(s). stop_time: Optional stop time to narrow the export within the run(s). channel_ids: Optional list of channel IDs to include. If omitted, all channels are exported. calculated_channel_configs: Optional inline calculated channels to include in the export. - use_legacy_format: Use legacy key-value metadata format for channel headers. - simplify_channel_names: Remove the component part of channel names if unique in the export. - combine_runs: Combine channels from the same asset across different runs into a single column. - split_export_by_asset: Split each asset into its own export file. - split_export_by_run: Split each run into its own export file. + use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. + simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. + combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. + split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. + split_export_by_run: Split each run into a separate file, with run name removed from channel name display. polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. @@ -201,14 +201,14 @@ async def export_by_asset( asset_ids: One or more asset IDs to export data from. start_time: Start of the time range to export. stop_time: End of the time range to export. - output_format: The file format for the export (CSV or SUN). + output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). channel_ids: Optional list of channel IDs to include. If omitted, all channels are exported. calculated_channel_configs: Optional inline calculated channels to include in the export. - use_legacy_format: Use legacy key-value metadata format for channel headers. - simplify_channel_names: Remove the component part of channel names if unique in the export. - combine_runs: Combine channels from the same asset across different runs into a single column. - split_export_by_asset: Split each asset into its own export file. - split_export_by_run: Split each run into its own export file. + use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. + simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. + combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. + split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. + split_export_by_run: Split each run into a separate file, with run name removed from channel name display. polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. @@ -282,14 +282,14 @@ async def export_by_time_range( Args: start_time: Start of the time range to export. stop_time: End of the time range to export. - output_format: The file format for the export (CSV or SUN). + output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). channel_ids: List of channel IDs to include in the export. calculated_channel_configs: Inline calculated channels to include in the export. - use_legacy_format: Use legacy key-value metadata format for channel headers. - simplify_channel_names: Remove the component part of channel names if unique in the export. - combine_runs: Combine channels from the same asset across different runs into a single column. - split_export_by_asset: Split each asset into its own export file. - split_export_by_run: Split each run into its own export file. + use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. + simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. + combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. + split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. + split_export_by_run: Split each run into a separate file, with run name removed from channel name display. polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. diff --git a/python/lib/sift_client/sift_types/export.py b/python/lib/sift_client/sift_types/export.py index 22273485a..b88749610 100644 --- a/python/lib/sift_client/sift_types/export.py +++ b/python/lib/sift_client/sift_types/export.py @@ -11,7 +11,7 @@ class ExportOutputFormat(Enum): Attributes: CSV: Comma-separated values format. - SUN: winplot format. + SUN: Sun (WinPlot) format. """ CSV = ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_CSV From 891fe87d8bf23ce4d426ebd45ff4b4a22f10fe6c Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 12 Mar 2026 11:57:23 -0700 Subject: [PATCH 09/53] python(fix): added assert for calc configs result --- python/lib/sift_client/_tests/resources/test_exports.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 5318c08c9..399bb4044 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -89,6 +89,7 @@ def test_returns_none_for_empty_list(self): def test_converts_to_proto(self, sample_calc_channels): """Test converting Pydantic models to proto CalculatedChannelConfig messages.""" result = _build_calc_configs(sample_calc_channels) + assert result is not None assert len(result) == 2 first = result[0] From 37810c27eda4c1e52c3c81554e3a2cdcec963e9c Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 12 Mar 2026 12:13:45 -0700 Subject: [PATCH 10/53] python(fix): updated sync stubs --- .../resources/sync_stubs/__init__.pyi | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index 9b2bc1628..222b36aea 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -27,7 +27,11 @@ if TYPE_CHECKING: FileAttachmentUpdate, RemoteFileEntityType, ) - from sift_client.sift_types.job import Job, JobStatus, JobType + from sift_client.sift_types.job import ( + Job, + JobStatus, + JobType, + ) from sift_client.sift_types.report import Report, ReportUpdate from sift_client.sift_types.rule import Rule, RuleCreate, RuleUpdate, RuleVersion from sift_client.sift_types.run import Run, RunCreate, RunUpdate @@ -602,14 +606,14 @@ class ExportsAPI: asset_ids: One or more asset IDs to export data from. start_time: Start of the time range to export. stop_time: End of the time range to export. - output_format: The file format for the export (CSV or SUN). + output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). channel_ids: Optional list of channel IDs to include. If omitted, all channels are exported. calculated_channel_configs: Optional inline calculated channels to include in the export. - use_legacy_format: Use legacy key-value metadata format for channel headers. - simplify_channel_names: Remove the component part of channel names if unique in the export. - combine_runs: Combine channels from the same asset across different runs into a single column. - split_export_by_asset: Split each asset into its own export file. - split_export_by_run: Split each run into its own export file. + use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. + simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. + combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. + split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. + split_export_by_run: Split each run into a separate file, with run name removed from channel name display. polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. @@ -646,16 +650,16 @@ class ExportsAPI: Args: run_ids: One or more run IDs to export data from. - output_format: The file format for the export (CSV or SUN). + output_format: The file format for the export (CSV or Sun/WinPlot). start_time: Optional start time to narrow the export within the run(s). stop_time: Optional stop time to narrow the export within the run(s). channel_ids: Optional list of channel IDs to include. If omitted, all channels are exported. calculated_channel_configs: Optional inline calculated channels to include in the export. - use_legacy_format: Use legacy key-value metadata format for channel headers. - simplify_channel_names: Remove the component part of channel names if unique in the export. - combine_runs: Combine channels from the same asset across different runs into a single column. - split_export_by_asset: Split each asset into its own export file. - split_export_by_run: Split each run into its own export file. + use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. + simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. + combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. + split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. + split_export_by_run: Split each run into a separate file, with run name removed from channel name display. polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. @@ -692,14 +696,14 @@ class ExportsAPI: Args: start_time: Start of the time range to export. stop_time: End of the time range to export. - output_format: The file format for the export (CSV or SUN). + output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). channel_ids: List of channel IDs to include in the export. calculated_channel_configs: Inline calculated channels to include in the export. - use_legacy_format: Use legacy key-value metadata format for channel headers. - simplify_channel_names: Remove the component part of channel names if unique in the export. - combine_runs: Combine channels from the same asset across different runs into a single column. - split_export_by_asset: Split each asset into its own export file. - split_export_by_run: Split each run into its own export file. + use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. + simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. + combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. + split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. + split_export_by_run: Split each run into a separate file, with run name removed from channel name display. polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. From 9ae44c6ac9d2b496a5dc4332dd093ad2b705ccad Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Sun, 15 Mar 2026 14:01:34 -0700 Subject: [PATCH 11/53] python(refactor): exports API accepts domain objects alongisde raw IDs --- python/lib/sift_client/resources/exports.py | 149 +++++++++++--------- python/lib/sift_client/sift_types/export.py | 51 +------ 2 files changed, 82 insertions(+), 118 deletions(-) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index dc6fb0ba2..90fd5d748 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -17,7 +17,11 @@ from sift_client._internal.low_level_wrappers.exports import ExportsLowLevelClient from sift_client._internal.util.timestamp import to_pb_timestamp from sift_client.resources._base import ResourceBase -from sift_client.sift_types.export import ExportCalculatedChannel, ExportOutputFormat # noqa: TC001 +from sift_client.sift_types.export import ExportOutputFormat # noqa: TC001 +from sift_client.sift_types.run import Run +from sift_client.sift_types.asset import Asset +from sift_client.sift_types.channel import Channel +from sift_client.sift_types.calculated_channel import CalculatedChannel, CalculatedChannelCreate if TYPE_CHECKING: from datetime import datetime @@ -25,27 +29,32 @@ from sift_client.client import SiftClient -def _build_calc_configs( - calculated_channel_configs: list[ExportCalculatedChannel] | None, -) -> list[CalculatedChannelConfig] | None: - """Convert CalculatedChannel Pydantic models to proto CalculatedChannelConfig messages.""" - if not calculated_channel_configs: - return None - return [ - CalculatedChannelConfig( - name=cc.name, - expression=cc.expression, - channel_references=[ - CalculatedChannelAbstractChannelReference( - channel_reference=ref.channel_reference, - channel_identifier=ref.channel_identifier, - ) - for ref in cc.channel_references - ], - units=cc.units, +def _build_calc_channels( + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None, +) -> list[CalculatedChannelConfig]: + if not calculated_channels: + return [] + configs = [] + for cc in calculated_channels: + if isinstance(cc, CalculatedChannelCreate): + refs = cc.expression_channel_references or [] + else: + refs = cc.channel_references + configs.append( + CalculatedChannelConfig( + name=cc.name, + expression=cc.expression, + channel_references=[ + CalculatedChannelAbstractChannelReference( + channel_reference=ref.channel_reference, + channel_identifier=ref.channel_identifier, + ) + for ref in refs + ], + units=cc.units, + ) ) - for cc in calculated_channel_configs - ] + return configs class ExportsAPIAsync(ResourceBase): @@ -91,12 +100,12 @@ def __init__(self, sift_client: SiftClient): async def export_by_run( self, *, - run_ids: list[str], + runs: list[str | Run], output_format: ExportOutputFormat, start_time: datetime | None = None, stop_time: datetime | None = None, - channel_ids: list[str] | None = None, - calculated_channel_configs: list[ExportCalculatedChannel] | None = None, + channels: list[str | Channel] | None = None, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, @@ -132,23 +141,23 @@ async def export_by_run( Raises: TimeoutError: If the export job does not complete within timeout_secs. """ - if not run_ids: - raise ValueError("'run_ids' must be a non-empty list of run IDs.") - if any(not run_id for run_id in run_ids): - raise ValueError("'run_ids' must not contain empty or null values.") + if not runs: + raise ValueError("'runs' must be a non-empty list of run objects or run ids.") + if any(not run for run in runs): + raise ValueError("'runs' must not contain empty or null values.") if (start_time is None) != (stop_time is None): raise ValueError("'start_time' and 'stop_time' must both be provided or both omitted.") if start_time and stop_time and start_time >= stop_time: raise ValueError("'start_time' must be before 'stop_time'.") + run_ids = [r._id_or_error if isinstance(r, Run) else r for r in runs] + runs_and_time_range = RunsAndTimeRange(run_ids=run_ids) if start_time: runs_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) if stop_time: runs_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) - calc_configs = _build_calc_configs(calculated_channel_configs) - export_options = ExportOptions( use_legacy_format=use_legacy_format, simplify_channel_names=simplify_channel_names, @@ -157,18 +166,20 @@ async def export_by_run( split_export_by_run=split_export_by_run, ) + channel_ids = ( + [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] + ) + request = ExportDataRequest( runs_and_time_range=runs_and_time_range, output_format=output_format.value, export_options=export_options, - channel_ids=channel_ids or [], - calculated_channel_configs=calc_configs or [], + channel_ids=channel_ids, + calculated_channel_configs=_build_calc_channels(calculated_channels), ) response = await self._low_level_client.export_data(request=request) - if response.presigned_url: - return response.presigned_url return await self._await_download_url( job_id=response.job_id, polling_interval_secs=polling_interval_secs, @@ -178,12 +189,12 @@ async def export_by_run( async def export_by_asset( self, *, - asset_ids: list[str], + assets: list[str | Asset], start_time: datetime, stop_time: datetime, output_format: ExportOutputFormat, - channel_ids: list[str] | None = None, - calculated_channel_configs: list[ExportCalculatedChannel] | None = None, + channels: list[str | Channel] | None = None, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, @@ -194,16 +205,16 @@ async def export_by_asset( ) -> str: """Export data scoped by one or more assets within a time range. - Both start_time and stop_time are required. If no channel_ids or - calculated_channel_configs are provided, all channels from the assets are included. + Both start_time and stop_time are required. If no channels or + calculated_channels are provided, all channels from the assets are included. Args: - asset_ids: One or more asset IDs to export data from. + assets: One or more Asset objects or asset IDs to export data from. start_time: Start of the time range to export. stop_time: End of the time range to export. output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). - channel_ids: Optional list of channel IDs to include. If omitted, all channels are exported. - calculated_channel_configs: Optional inline calculated channels to include in the export. + channels: Optional list of Channel objects or channel IDs to include. If omitted, all channels are exported. + calculated_channels: Optional calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. @@ -218,19 +229,19 @@ async def export_by_asset( Raises: TimeoutError: If the export job does not complete within timeout_secs. """ - if not asset_ids: - raise ValueError("'asset_ids' must be a non-empty list of asset IDs.") - if any(not asset_id for asset_id in asset_ids): - raise ValueError("'asset_ids' must not contain empty or null values.") + if not assets: + raise ValueError("'assets' must be a non-empty list of asset objects or asset IDs.") + if any(not asset for asset in assets): + raise ValueError("'assets' must not contain empty or null values.") if start_time >= stop_time: raise ValueError("'start_time' must be before 'stop_time'.") + asset_ids = [a._id_or_error if isinstance(a, Asset) else a for a in assets] + assets_and_time_range = AssetsAndTimeRange(asset_ids=asset_ids) assets_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) assets_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) - calc_configs = _build_calc_configs(calculated_channel_configs) - export_options = ExportOptions( use_legacy_format=use_legacy_format, simplify_channel_names=simplify_channel_names, @@ -239,18 +250,20 @@ async def export_by_asset( split_export_by_run=split_export_by_run, ) + channel_ids = ( + [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] + ) + request = ExportDataRequest( assets_and_time_range=assets_and_time_range, - channel_ids=channel_ids or [], - calculated_channel_configs=calc_configs or [], + channel_ids=channel_ids, + calculated_channel_configs=_build_calc_channels(calculated_channels), output_format=output_format.value, export_options=export_options, ) response = await self._low_level_client.export_data(request=request) - if response.presigned_url: - return response.presigned_url return await self._await_download_url( job_id=response.job_id, polling_interval_secs=polling_interval_secs, @@ -263,8 +276,8 @@ async def export_by_time_range( start_time: datetime, stop_time: datetime, output_format: ExportOutputFormat, - channel_ids: list[str] | None = None, - calculated_channel_configs: list[ExportCalculatedChannel] | None = None, + channels: list[str | Channel] | None = None, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, @@ -275,16 +288,16 @@ async def export_by_time_range( ) -> str: """Export data within a time range. - Both start_time and stop_time are required. At least one of channel_ids or - calculated_channel_configs **must** be provided to scope the data, since there + Both start_time and stop_time are required. At least one of channels or + calculated_channels **must** be provided to scope the data, since there are no runs or assets to infer channels from. Args: start_time: Start of the time range to export. stop_time: End of the time range to export. output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). - channel_ids: List of channel IDs to include in the export. - calculated_channel_configs: Inline calculated channels to include in the export. + channels: List of Channel objects or channel IDs to include in the export. + calculated_channels: Calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. @@ -297,12 +310,12 @@ async def export_by_time_range( A presigned download URL for the exported zip file. Raises: - ValueError: If neither channel_ids nor calculated_channel_configs is provided. + ValueError: If neither channels nor calculated_channels is provided. TimeoutError: If the export job does not complete within timeout_secs. """ - if not channel_ids and not calculated_channel_configs: + if not channels and not calculated_channels: raise ValueError( - "At least one of 'channel_ids' or 'calculated_channel_configs' must be provided " + "At least one of 'channels' or 'calculated_channels' must be provided " "when exporting by time range." ) if start_time >= stop_time: @@ -312,8 +325,6 @@ async def export_by_time_range( time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) - calc_configs = _build_calc_configs(calculated_channel_configs) - export_options = ExportOptions( use_legacy_format=use_legacy_format, simplify_channel_names=simplify_channel_names, @@ -322,18 +333,20 @@ async def export_by_time_range( split_export_by_run=split_export_by_run, ) + channel_ids = ( + [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] + ) + request = ExportDataRequest( time_range=time_range, - channel_ids=channel_ids or [], - calculated_channel_configs=calc_configs or [], + channel_ids=channel_ids, + calculated_channel_configs=_build_calc_channels(calculated_channels), output_format=output_format.value, export_options=export_options, ) response = await self._low_level_client.export_data(request=request) - if response.presigned_url: - return response.presigned_url return await self._await_download_url( job_id=response.job_id, polling_interval_secs=polling_interval_secs, @@ -356,7 +369,7 @@ async def _await_download_url( and job.job_status_details.error_message ): reason = f": {job.job_status_details.error_message}" - raise RuntimeError(f"Export job '{job_id}' failed{reason}") + raise RuntimeError(f"Export job '{job_id}' failed {reason}") if job.job_status == JobStatus.CANCELLED: raise RuntimeError(f"Export job '{job_id}' was cancelled.") return await self._low_level_client.get_download_url(job_id=job_id) diff --git a/python/lib/sift_client/sift_types/export.py b/python/lib/sift_client/sift_types/export.py index b88749610..bac3eac31 100644 --- a/python/lib/sift_client/sift_types/export.py +++ b/python/lib/sift_client/sift_types/export.py @@ -2,7 +2,6 @@ from enum import Enum -from pydantic import BaseModel from sift.exports.v1.exports_pb2 import ExportOutputFormat as ExportOutputFormatProto @@ -11,56 +10,8 @@ class ExportOutputFormat(Enum): Attributes: CSV: Comma-separated values format. - SUN: Sun (WinPlot) format. + SUN: Sun (WinPlot) format (not used in certain environments). """ CSV = ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_CSV SUN = ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_SUN - - -class ChannelReference(BaseModel): - """Maps a placeholder variable in a calculated channel expression to an actual channel. - - Example:: - - ChannelReference( - channel_reference="$1", # must match the placeholder exactly as it appears in the expression - channel_identifier="cbddaf97-3332-4666-80f2-a19be6a77eef", # channel UUID - ) - - Attributes: - channel_reference: The placeholder as it appears in the expression, i.e. $1, $2, etc. - channel_identifier: The channel UUID. - """ - - channel_reference: str - channel_identifier: str - - -class ExportCalculatedChannel(BaseModel): - """An inline calculated channel to include in an export. - - Defines a formula-based channel that is computed at export time from existing channels. - - Example:: - - ExportCalculatedChannel( - name="speed_doubled", - expression="$1 * 2", - channel_references=[ - ChannelReference(channel_reference="$1", channel_identifier=""), - ], - units="m/s", - ) - - Attributes: - name: Display name for the calculated channel in the export. - expression: The formula to compute, using $placeholder syntax for channel references. - channel_references: Mappings from expression placeholders to actual channels. - units: Optional unit label for the calculated channel. - """ - - name: str - expression: str - channel_references: list[ChannelReference] - units: str | None = None From 67dc30c42e1d8a61a7f1a7f67c3e659e72a785ee Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Sun, 15 Mar 2026 15:31:26 -0700 Subject: [PATCH 12/53] python(refactor): exports API returns job, separate high-level/low-level concerns, update tests --- .../_internal/low_level_wrappers/exports.py | 240 +++++- .../_tests/resources/test_exports.py | 717 ++++++++++-------- python/lib/sift_client/resources/exports.py | 264 +++---- .../resources/sync_stubs/__init__.pyi | 122 +-- 4 files changed, 803 insertions(+), 540 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/exports.py b/python/lib/sift_client/_internal/low_level_wrappers/exports.py index 9c1c43970..789807e61 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/exports.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/exports.py @@ -2,21 +2,80 @@ from typing import TYPE_CHECKING, cast +from sift.calculated_channels.v2.calculated_channels_pb2 import ( + CalculatedChannelAbstractChannelReference, +) from sift.exports.v1.exports_pb2 import ( + AssetsAndTimeRange, + CalculatedChannelConfig, ExportDataRequest, ExportDataResponse, + ExportOptions, GetDownloadUrlRequest, GetDownloadUrlResponse, + RunsAndTimeRange, + TimeRange, ) from sift.exports.v1.exports_pb2_grpc import ExportServiceStub from sift_client._internal.low_level_wrappers.base import LowLevelClientBase +from sift_client._internal.util.timestamp import to_pb_timestamp +from sift_client.sift_types.calculated_channel import CalculatedChannel, CalculatedChannelCreate from sift_client.transport import WithGrpcClient if TYPE_CHECKING: + from datetime import datetime + from sift_client.transport.grpc_transport import GrpcClient +def _build_calc_channel_configs( + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None, +) -> list[CalculatedChannelConfig]: + """Convert high-level calculated channel objects to proto CalculatedChannelConfig messages.""" + if not calculated_channels: + return [] + configs = [] + for cc in calculated_channels: + if isinstance(cc, CalculatedChannelCreate): + refs = cc.expression_channel_references or [] + else: + refs = cc.channel_references + configs.append( + CalculatedChannelConfig( + name=cc.name, + expression=cc.expression, + channel_references=[ + CalculatedChannelAbstractChannelReference( + channel_reference=ref.channel_reference, + channel_identifier=ref.channel_identifier, + ) + for ref in refs + ], + units=cc.units, + ) + ) + return configs + + +def _build_export_options( + *, + use_legacy_format: bool = False, + simplify_channel_names: bool = False, + combine_runs: bool = False, + split_export_by_asset: bool = False, + split_export_by_run: bool = False, +) -> ExportOptions: + """Build an ExportOptions proto from primitive flags.""" + return ExportOptions( + use_legacy_format=use_legacy_format, + simplify_channel_names=simplify_channel_names, + combine_runs=combine_runs, + split_export_by_asset=split_export_by_asset, + split_export_by_run=split_export_by_run, + ) + + class ExportsLowLevelClient(LowLevelClientBase, WithGrpcClient): """Low-level client for the ExportsAPI. @@ -31,30 +90,173 @@ def __init__(self, grpc_client: GrpcClient): """ super().__init__(grpc_client) - async def export_data(self, request: ExportDataRequest) -> ExportDataResponse: - """Initiate a data export. + async def export_by_run( + self, + *, + run_ids: list[str], + output_format: int, + start_time: datetime | None = None, + stop_time: datetime | None = None, + channel_ids: list[str] | None = None, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, + use_legacy_format: bool = False, + simplify_channel_names: bool = False, + combine_runs: bool = False, + split_export_by_asset: bool = False, + split_export_by_run: bool = False, + ) -> str: + """Initiate a data export scoped by runs. + + Args: + run_ids: List of run IDs to export. + output_format: The proto enum value for the export format. + start_time: Optional start time to narrow the export. + stop_time: Optional stop time to narrow the export. + channel_ids: Optional list of channel IDs to include. + calculated_channels: Optional calculated channel objects to include. + use_legacy_format: Use legacy channel name display format. + simplify_channel_names: Simplify channel names if unique. + combine_runs: Combine identical channels across runs. + split_export_by_asset: Split export by asset. + split_export_by_run: Split export by run. + + Returns: + The job ID for the background export. + """ + runs_and_time_range = RunsAndTimeRange(run_ids=run_ids) + if start_time: + runs_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) + if stop_time: + runs_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) + + request = ExportDataRequest( + runs_and_time_range=runs_and_time_range, + output_format=output_format, + export_options=_build_export_options( + use_legacy_format=use_legacy_format, + simplify_channel_names=simplify_channel_names, + combine_runs=combine_runs, + split_export_by_asset=split_export_by_asset, + split_export_by_run=split_export_by_run, + ), + channel_ids=channel_ids or [], + calculated_channel_configs=_build_calc_channel_configs(calculated_channels), + ) - Returns a presigned_url if the export completes immediately, or a job_id - if it's processed in the background. Use get_download_url() to retrieve - the URL for background jobs. + response = await self._export_data(request) + return response.job_id + + async def export_by_asset( + self, + *, + asset_ids: list[str], + start_time: datetime, + stop_time: datetime, + output_format: int, + channel_ids: list[str] | None = None, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, + use_legacy_format: bool = False, + simplify_channel_names: bool = False, + combine_runs: bool = False, + split_export_by_asset: bool = False, + split_export_by_run: bool = False, + ) -> str: + """Initiate a data export scoped by assets and a time range. Args: - request: The ExportDataRequest proto message. + asset_ids: List of asset IDs to export. + start_time: Start of the time range. + stop_time: End of the time range. + output_format: The proto enum value for the export format. + channel_ids: Optional list of channel IDs to include. + calculated_channels: Optional calculated channel objects to include. + use_legacy_format: Use legacy channel name display format. + simplify_channel_names: Simplify channel names if unique. + combine_runs: Combine identical channels across runs. + split_export_by_asset: Split export by asset. + split_export_by_run: Split export by run. Returns: - The ExportDataResponse containing either a presigned_url or a job_id. + The job ID for the background export. """ - response = await self._grpc_client.get_stub(ExportServiceStub).ExportData(request) - return cast("ExportDataResponse", response) + assets_and_time_range = AssetsAndTimeRange(asset_ids=asset_ids) + assets_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) + assets_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) + + request = ExportDataRequest( + assets_and_time_range=assets_and_time_range, + output_format=output_format, + export_options=_build_export_options( + use_legacy_format=use_legacy_format, + simplify_channel_names=simplify_channel_names, + combine_runs=combine_runs, + split_export_by_asset=split_export_by_asset, + split_export_by_run=split_export_by_run, + ), + channel_ids=channel_ids or [], + calculated_channel_configs=_build_calc_channel_configs(calculated_channels), + ) + + response = await self._export_data(request) + return response.job_id + + async def export_by_time_range( + self, + *, + start_time: datetime, + stop_time: datetime, + output_format: int, + channel_ids: list[str] | None = None, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, + use_legacy_format: bool = False, + simplify_channel_names: bool = False, + combine_runs: bool = False, + split_export_by_asset: bool = False, + split_export_by_run: bool = False, + ) -> str: + """Initiate a data export scoped by a time range. + + Args: + start_time: Start of the time range. + stop_time: End of the time range. + output_format: The proto enum value for the export format. + channel_ids: Optional list of channel IDs to include. + calculated_channels: Optional calculated channel objects to include. + use_legacy_format: Use legacy channel name display format. + simplify_channel_names: Simplify channel names if unique. + combine_runs: Combine identical channels across runs. + split_export_by_asset: Split export by asset. + split_export_by_run: Split export by run. + + Returns: + The job ID for the background export. + """ + time_range = TimeRange() + time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) + time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) + + request = ExportDataRequest( + time_range=time_range, + output_format=output_format, + export_options=_build_export_options( + use_legacy_format=use_legacy_format, + simplify_channel_names=simplify_channel_names, + combine_runs=combine_runs, + split_export_by_asset=split_export_by_asset, + split_export_by_run=split_export_by_run, + ), + channel_ids=channel_ids or [], + calculated_channel_configs=_build_calc_channel_configs(calculated_channels), + ) + + response = await self._export_data(request) + return response.job_id async def get_download_url(self, job_id: str) -> str: """Get the download URL for a background export job. - If the job is still processing, the server will return an error. - Polling/retry logic should be handled. - Args: - job_id: The job ID returned from export_data(). + job_id: The job ID returned from an export method. Returns: The presigned URL to download the exported zip file. @@ -63,3 +265,15 @@ async def get_download_url(self, job_id: str) -> str: response = await self._grpc_client.get_stub(ExportServiceStub).GetDownloadUrl(request) response = cast("GetDownloadUrlResponse", response) return response.presigned_url + + async def _export_data(self, request: ExportDataRequest) -> ExportDataResponse: + """Make the ExportData gRPC call. + + Args: + request: The ExportDataRequest proto message. + + Returns: + The ExportDataResponse. + """ + response = await self._grpc_client.get_stub(ExportServiceStub).ExportData(request) + return cast("ExportDataResponse", response) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 399bb4044..8b167ba64 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -1,9 +1,9 @@ """Pytest tests for the Exports API. These tests validate the usage of the ExportsAPIAsync including: -- Request construction for all three export methods (by run, asset, time range) -- Synchronous (presigned_url) and asynchronous (job polling) response handling -- Calculated channel config conversion to proto messages +- Correct delegation to the low-level client for all three export methods +- Domain object resolution (Run -> run_id, Asset -> asset_id, Channel -> channel_id) +- Job lifecycle: export methods return Job, wait_until_complete returns URL - Input validation and error handling """ @@ -13,20 +13,19 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest -from sift.exports.v1.exports_pb2 import ( - ExportDataResponse, -) -from sift.exports.v1.exports_pb2 import ( - ExportOutputFormat as ExportOutputFormatProto, -) - -from sift_client.resources.exports import ExportsAPIAsync, _build_calc_configs -from sift_client.sift_types.export import ( - ChannelReference, - ExportCalculatedChannel, - ExportOutputFormat, -) +from sift.exports.v1.exports_pb2 import ExportOutputFormat as ExportOutputFormatProto + +from sift_client._internal.low_level_wrappers.exports import _build_calc_channel_configs +from sift_client.resources.exports import ExportsAPIAsync +from sift_client.sift_types.asset import Asset +from sift_client.sift_types.calculated_channel import CalculatedChannelCreate, ChannelReference +from sift_client.sift_types.channel import Channel +from sift_client.sift_types.export import ExportOutputFormat from sift_client.sift_types.job import DataExportStatusDetails, Job, JobStatus +from sift_client.sift_types.run import Run + +START = datetime(2025, 1, 1, tzinfo=timezone.utc) +STOP = datetime(2025, 1, 2, tzinfo=timezone.utc) @pytest.fixture @@ -40,30 +39,45 @@ def mock_client(): @pytest.fixture -def exports_api(mock_client): +def mock_job(): + """Create a mock Job returned by jobs.get.""" + job = MagicMock(spec=Job) + job._id_or_error = "job-123" + job.job_status = JobStatus.FINISHED + return job + + +@pytest.fixture +def exports_api(mock_client, mock_job): """Create an ExportsAPIAsync with a mocked low-level client.""" with patch("sift_client.resources.exports.ExportsLowLevelClient", autospec=True) as mock_ll: api = ExportsAPIAsync(mock_client) api._low_level_client = mock_ll.return_value + # Default: low-level export methods return a job_id + api._low_level_client.export_by_run = AsyncMock(return_value="job-123") + api._low_level_client.export_by_asset = AsyncMock(return_value="job-123") + api._low_level_client.export_by_time_range = AsyncMock(return_value="job-123") + # Default: jobs.get returns a mock Job + mock_client.async_.jobs.get = AsyncMock(return_value=mock_job) return api @pytest.fixture def sample_calc_channels(): - """Create sample calculated channel configs for testing.""" + """Create sample calculated channel definitions for testing.""" return [ - ExportCalculatedChannel( + CalculatedChannelCreate( name="speed_doubled", expression="$1 * 2", - channel_references=[ + expression_channel_references=[ ChannelReference(channel_reference="$1", channel_identifier="ch-uuid-1"), ], units="m/s", ), - ExportCalculatedChannel( + CalculatedChannelCreate( name="no_units", expression="$1 + $2", - channel_references=[ + expression_channel_references=[ ChannelReference(channel_reference="$1", channel_identifier="ch-uuid-1"), ChannelReference(channel_reference="$2", channel_identifier="ch-uuid-2"), ], @@ -71,25 +85,20 @@ def sample_calc_channels(): ] -START = datetime(2025, 1, 1, tzinfo=timezone.utc) -STOP = datetime(2025, 1, 2, tzinfo=timezone.utc) - +class TestBuildCalcChannelConfigs: + """Tests for the _build_calc_channel_configs helper in the low-level client.""" -class TestBuildCalcConfigs: - """Tests for the _build_calc_configs helper.""" + def test_returns_empty_list_for_none(self): + """Test that None input returns an empty list.""" + assert _build_calc_channel_configs(None) == [] - def test_returns_none_for_none(self): - """Test that None input returns None.""" - assert _build_calc_configs(None) is None - - def test_returns_none_for_empty_list(self): - """Test that an empty list returns None.""" - assert _build_calc_configs([]) is None + def test_returns_empty_list_for_empty_list(self): + """Test that an empty list returns an empty list.""" + assert _build_calc_channel_configs([]) == [] def test_converts_to_proto(self, sample_calc_channels): - """Test converting Pydantic models to proto CalculatedChannelConfig messages.""" - result = _build_calc_configs(sample_calc_channels) - assert result is not None + """Test converting CalculatedChannelCreate objects to proto CalculatedChannelConfig messages.""" + result = _build_calc_channel_configs(sample_calc_channels) assert len(result) == 2 first = result[0] @@ -106,310 +115,394 @@ def test_converts_to_proto(self, sample_calc_channels): assert len(second.channel_references) == 2 -class TestExportByRun: - """Tests for the export_by_run method.""" - - @pytest.mark.asyncio - async def test_builds_correct_request_and_returns_presigned_url(self, exports_api): - """Test request construction with all parameters and synchronous presigned URL response.""" - exports_api._low_level_client.export_data = AsyncMock( - return_value=ExportDataResponse(presigned_url="https://download.test/run.zip") - ) - - url = await exports_api.export_by_run( - run_ids=["run-1", "run-2"], - output_format=ExportOutputFormat.CSV, - start_time=START, - stop_time=STOP, - channel_ids=["ch-1"], - use_legacy_format=True, - simplify_channel_names=True, - combine_runs=True, - split_export_by_asset=True, - split_export_by_run=True, - ) - - assert url == "https://download.test/run.zip" - req = exports_api._low_level_client.export_data.call_args.kwargs["request"] - assert list(req.runs_and_time_range.run_ids) == ["run-1", "run-2"] - assert req.runs_and_time_range.HasField("start_time") - assert req.runs_and_time_range.HasField("stop_time") - assert list(req.channel_ids) == ["ch-1"] - assert req.output_format == ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_CSV - assert req.export_options.use_legacy_format is True - assert req.export_options.simplify_channel_names is True - assert req.export_options.combine_runs is True - assert req.export_options.split_export_by_asset is True - assert req.export_options.split_export_by_run is True - - @pytest.mark.asyncio - async def test_minimal_args(self, exports_api): - """Test request construction with only required parameters.""" - exports_api._low_level_client.export_data = AsyncMock( - return_value=ExportDataResponse(presigned_url="https://download.test/min.zip") - ) - - url = await exports_api.export_by_run( - run_ids=["run-1"], - output_format=ExportOutputFormat.SUN, - ) - - assert url == "https://download.test/min.zip" - req = exports_api._low_level_client.export_data.call_args.kwargs["request"] - assert list(req.runs_and_time_range.run_ids) == ["run-1"] - assert not req.runs_and_time_range.HasField("start_time") - assert not req.runs_and_time_range.HasField("stop_time") - assert list(req.channel_ids) == [] - assert req.output_format == ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_SUN - - @pytest.mark.asyncio - async def test_with_calculated_channels(self, exports_api, sample_calc_channels): - """Test that calculated channel configs are included in the request.""" - exports_api._low_level_client.export_data = AsyncMock( - return_value=ExportDataResponse(presigned_url="https://download.test/calc.zip") - ) - - await exports_api.export_by_run( - run_ids=["run-1"], - output_format=ExportOutputFormat.CSV, - calculated_channel_configs=sample_calc_channels, - ) - - req = exports_api._low_level_client.export_data.call_args.kwargs["request"] - assert len(req.calculated_channel_configs) == 2 - assert req.calculated_channel_configs[0].name == "speed_doubled" - - @pytest.mark.asyncio - async def test_async_job_path(self, exports_api, mock_client): - """Test that an empty presigned_url falls back to job polling and get_download_url.""" - exports_api._low_level_client.export_data = AsyncMock( - return_value=ExportDataResponse(job_id="job-123") - ) - - mock_job = MagicMock(spec=Job) - mock_job.job_status = JobStatus.FINISHED - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=mock_job) - - exports_api._low_level_client.get_download_url = AsyncMock( - return_value="https://download.test/async.zip" - ) - - url = await exports_api.export_by_run( - run_ids=["run-1"], - output_format=ExportOutputFormat.CSV, - polling_interval_secs=1, - timeout_secs=10, - ) - - assert url == "https://download.test/async.zip" - mock_client.async_.jobs.wait_until_complete.assert_awaited_once_with( - job="job-123", polling_interval_secs=1, timeout_secs=10 - ) - exports_api._low_level_client.get_download_url.assert_awaited_once_with(job_id="job-123") - - @pytest.mark.asyncio - async def test_async_job_failed_raises_with_reason(self, exports_api, mock_client): - """Test that a failed job raises RuntimeError with the error message from status details.""" - exports_api._low_level_client.export_data = AsyncMock( - return_value=ExportDataResponse(job_id="job-fail") - ) - mock_job = MagicMock(spec=Job) - mock_job.job_status = JobStatus.FAILED - mock_job.job_status_details = DataExportStatusDetails(error_message="out of memory") - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=mock_job) - - with pytest.raises(RuntimeError, match=r"failed.*out of memory"): - await exports_api.export_by_run(run_ids=["run-1"], output_format=ExportOutputFormat.CSV) - - @pytest.mark.asyncio - async def test_async_job_failed_raises_without_reason(self, exports_api, mock_client): - """Test that a failed job with no status details still raises RuntimeError.""" - exports_api._low_level_client.export_data = AsyncMock( - return_value=ExportDataResponse(job_id="job-fail") - ) - mock_job = MagicMock(spec=Job) - mock_job.job_status = JobStatus.FAILED - mock_job.job_status_details = None - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=mock_job) - - with pytest.raises(RuntimeError, match="failed"): - await exports_api.export_by_run(run_ids=["run-1"], output_format=ExportOutputFormat.CSV) - - @pytest.mark.asyncio - async def test_async_job_cancelled_raises(self, exports_api, mock_client): - """Test that a cancelled job raises RuntimeError.""" - exports_api._low_level_client.export_data = AsyncMock( - return_value=ExportDataResponse(job_id="job-cancel") - ) - mock_job = MagicMock(spec=Job) - mock_job.job_status = JobStatus.CANCELLED - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=mock_job) - - with pytest.raises(RuntimeError, match="cancelled"): - await exports_api.export_by_run(run_ids=["run-1"], output_format=ExportOutputFormat.CSV) - - @pytest.mark.asyncio - async def test_empty_run_ids_raises(self, exports_api): - """Test that an empty run_ids list raises ValueError.""" - with pytest.raises(ValueError, match="run_ids"): - await exports_api.export_by_run(run_ids=[], output_format=ExportOutputFormat.CSV) - - @pytest.mark.asyncio - async def test_null_run_id_raises(self, exports_api): - """Test that a run_ids list containing an empty string raises ValueError.""" - with pytest.raises(ValueError, match="empty or null"): - await exports_api.export_by_run( - run_ids=["", "run-1"], output_format=ExportOutputFormat.CSV +class TestExportsAPIAsync: + """Tests for the ExportsAPIAsync high-level client.""" + + class TestExportByRun: + """Tests for the export_by_run method.""" + + @pytest.mark.asyncio + async def test_delegates_to_low_level_and_returns_job(self, exports_api): + """Test that export_by_run passes correct args to low-level and returns a Job.""" + job = await exports_api.export_by_run( + runs=["run-1", "run-2"], + output_format=ExportOutputFormat.CSV, + start_time=START, + stop_time=STOP, + channels=["ch-1"], + use_legacy_format=True, + simplify_channel_names=True, + combine_runs=True, + split_export_by_asset=True, + split_export_by_run=True, + ) + + assert isinstance(job, MagicMock) + exports_api._low_level_client.export_by_run.assert_awaited_once_with( + run_ids=["run-1", "run-2"], + output_format=ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_CSV, + start_time=START, + stop_time=STOP, + channel_ids=["ch-1"], + calculated_channels=None, + use_legacy_format=True, + simplify_channel_names=True, + combine_runs=True, + split_export_by_asset=True, + split_export_by_run=True, ) - @pytest.mark.asyncio - async def test_start_after_stop_raises(self, exports_api): - """Test that start_time >= stop_time raises ValueError.""" - with pytest.raises(ValueError, match="start_time"): + @pytest.mark.asyncio + async def test_minimal_args(self, exports_api): + """Test that minimal arguments are passed correctly with defaults.""" await exports_api.export_by_run( + runs=["run-1"], + output_format=ExportOutputFormat.SUN, + ) + + exports_api._low_level_client.export_by_run.assert_awaited_once_with( run_ids=["run-1"], - output_format=ExportOutputFormat.CSV, - start_time=STOP, - stop_time=START, + output_format=ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_SUN, + start_time=None, + stop_time=None, + channel_ids=[], + calculated_channels=None, + use_legacy_format=False, + simplify_channel_names=False, + combine_runs=False, + split_export_by_asset=False, + split_export_by_run=False, ) - @pytest.mark.asyncio - async def test_start_without_stop_raises(self, exports_api): - """Test that providing start_time without stop_time raises ValueError.""" - with pytest.raises(ValueError, match="both be provided or both omitted"): + @pytest.mark.asyncio + async def test_with_calculated_channels(self, exports_api, sample_calc_channels): + """Test that calculated channels are passed through to the low-level client.""" await exports_api.export_by_run( - run_ids=["run-1"], + runs=["run-1"], output_format=ExportOutputFormat.CSV, - start_time=START, + calculated_channels=sample_calc_channels, ) - @pytest.mark.asyncio - async def test_stop_without_start_raises(self, exports_api): - """Test that providing stop_time without start_time raises ValueError.""" - with pytest.raises(ValueError, match="both be provided or both omitted"): + call_kwargs = exports_api._low_level_client.export_by_run.call_args.kwargs + assert call_kwargs["calculated_channels"] == sample_calc_channels + + @pytest.mark.asyncio + async def test_resolves_run_objects_to_ids(self, exports_api): + """Test that Run domain objects are resolved to their IDs.""" + mock_run = MagicMock(spec=Run) + mock_run._id_or_error = "resolved-run-id" + await exports_api.export_by_run( - run_ids=["run-1"], + runs=[mock_run, "raw-id"], output_format=ExportOutputFormat.CSV, - stop_time=STOP, ) + call_kwargs = exports_api._low_level_client.export_by_run.call_args.kwargs + assert call_kwargs["run_ids"] == ["resolved-run-id", "raw-id"] -class TestExportByAsset: - """Tests for the export_by_asset method.""" - - @pytest.mark.asyncio - async def test_builds_correct_request(self, exports_api): - """Test request construction with assets, time range, and channel IDs.""" - exports_api._low_level_client.export_data = AsyncMock( - return_value=ExportDataResponse(presigned_url="https://download.test/asset.zip") - ) - - url = await exports_api.export_by_asset( - asset_ids=["asset-1"], - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.CSV, - channel_ids=["ch-1", "ch-2"], - ) - - assert url == "https://download.test/asset.zip" - req = exports_api._low_level_client.export_data.call_args.kwargs["request"] - assert list(req.assets_and_time_range.asset_ids) == ["asset-1"] - assert req.assets_and_time_range.HasField("start_time") - assert req.assets_and_time_range.HasField("stop_time") - assert list(req.channel_ids) == ["ch-1", "ch-2"] - assert req.output_format == ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_CSV - - @pytest.mark.asyncio - async def test_empty_asset_ids_raises(self, exports_api): - """Test that an empty asset_ids list raises ValueError.""" - with pytest.raises(ValueError, match="asset_ids"): - await exports_api.export_by_asset( - asset_ids=[], start_time=START, stop_time=STOP, output_format=ExportOutputFormat.CSV + @pytest.mark.asyncio + async def test_resolves_channel_objects_to_ids(self, exports_api): + """Test that Channel domain objects are resolved to their IDs.""" + mock_channel = MagicMock(spec=Channel) + mock_channel._id_or_error = "resolved-ch-id" + + await exports_api.export_by_run( + runs=["run-1"], + output_format=ExportOutputFormat.CSV, + channels=[mock_channel, "raw-ch-id"], ) - @pytest.mark.asyncio - async def test_null_asset_id_raises(self, exports_api): - """Test that an asset_ids list containing an empty string raises ValueError.""" - with pytest.raises(ValueError, match="empty or null"): - await exports_api.export_by_asset( - asset_ids=[""], + call_kwargs = exports_api._low_level_client.export_by_run.call_args.kwargs + assert call_kwargs["channel_ids"] == ["resolved-ch-id", "raw-ch-id"] + + @pytest.mark.asyncio + async def test_empty_runs_raises(self, exports_api): + """Test that an empty runs list raises ValueError.""" + with pytest.raises(ValueError, match="runs"): + await exports_api.export_by_run(runs=[], output_format=ExportOutputFormat.CSV) + + @pytest.mark.asyncio + async def test_null_run_raises(self, exports_api): + """Test that a runs list containing an empty string raises ValueError.""" + with pytest.raises(ValueError, match="empty or null"): + await exports_api.export_by_run( + runs=["", "run-1"], output_format=ExportOutputFormat.CSV + ) + + @pytest.mark.asyncio + async def test_start_after_stop_raises(self, exports_api): + """Test that start_time >= stop_time raises ValueError.""" + with pytest.raises(ValueError, match="start_time"): + await exports_api.export_by_run( + runs=["run-1"], + output_format=ExportOutputFormat.CSV, + start_time=STOP, + stop_time=START, + ) + + @pytest.mark.asyncio + async def test_start_without_stop_raises(self, exports_api): + """Test that providing start_time without stop_time raises ValueError.""" + with pytest.raises(ValueError, match="both be provided or both omitted"): + await exports_api.export_by_run( + runs=["run-1"], + output_format=ExportOutputFormat.CSV, + start_time=START, + ) + + @pytest.mark.asyncio + async def test_stop_without_start_raises(self, exports_api): + """Test that providing stop_time without start_time raises ValueError.""" + with pytest.raises(ValueError, match="both be provided or both omitted"): + await exports_api.export_by_run( + runs=["run-1"], + output_format=ExportOutputFormat.CSV, + stop_time=STOP, + ) + + class TestExportByAsset: + """Tests for the export_by_asset method.""" + + @pytest.mark.asyncio + async def test_delegates_to_low_level_and_returns_job(self, exports_api): + """Test that export_by_asset passes correct args to low-level and returns a Job.""" + job = await exports_api.export_by_asset( + assets=["asset-1"], start_time=START, stop_time=STOP, output_format=ExportOutputFormat.CSV, + channels=["ch-1", "ch-2"], ) - @pytest.mark.asyncio - async def test_start_after_stop_raises(self, exports_api): - """Test that start_time >= stop_time raises ValueError.""" - with pytest.raises(ValueError, match="start_time"): - await exports_api.export_by_asset( + assert isinstance(job, MagicMock) + exports_api._low_level_client.export_by_asset.assert_awaited_once_with( asset_ids=["asset-1"], - start_time=STOP, - stop_time=START, - output_format=ExportOutputFormat.CSV, + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_CSV, + channel_ids=["ch-1", "ch-2"], + calculated_channels=None, + use_legacy_format=False, + simplify_channel_names=False, + combine_runs=False, + split_export_by_asset=False, + split_export_by_run=False, ) + @pytest.mark.asyncio + async def test_resolves_asset_objects_to_ids(self, exports_api): + """Test that Asset domain objects are resolved to their IDs.""" + mock_asset = MagicMock(spec=Asset) + mock_asset._id_or_error = "resolved-asset-id" -class TestExportByTimeRange: - """Tests for the export_by_time_range method.""" - - @pytest.mark.asyncio - async def test_builds_correct_request_with_channel_ids(self, exports_api): - """Test request construction with time range and channel IDs.""" - exports_api._low_level_client.export_data = AsyncMock( - return_value=ExportDataResponse(presigned_url="https://download.test/time.zip") - ) - - url = await exports_api.export_by_time_range( - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.SUN, - channel_ids=["ch-1"], - ) - - assert url == "https://download.test/time.zip" - req = exports_api._low_level_client.export_data.call_args.kwargs["request"] - assert req.time_range.HasField("start_time") - assert req.time_range.HasField("stop_time") - assert list(req.channel_ids) == ["ch-1"] - assert req.output_format == ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_SUN - - @pytest.mark.asyncio - async def test_builds_correct_request_with_calc_channels( - self, exports_api, sample_calc_channels - ): - """Test request construction with calculated channels instead of channel IDs.""" - exports_api._low_level_client.export_data = AsyncMock( - return_value=ExportDataResponse(presigned_url="https://download.test/calc.zip") - ) - - await exports_api.export_by_time_range( - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.CSV, - calculated_channel_configs=sample_calc_channels, - ) - - req = exports_api._low_level_client.export_data.call_args.kwargs["request"] - assert len(req.calculated_channel_configs) == 2 - assert list(req.channel_ids) == [] - - @pytest.mark.asyncio - async def test_no_channels_raises(self, exports_api): - """Test that omitting both channel_ids and calculated_channel_configs raises ValueError.""" - with pytest.raises(ValueError, match=r"channel_ids.*calculated_channel_configs"): + await exports_api.export_by_asset( + assets=[mock_asset, "raw-id"], + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.CSV, + ) + + call_kwargs = exports_api._low_level_client.export_by_asset.call_args.kwargs + assert call_kwargs["asset_ids"] == ["resolved-asset-id", "raw-id"] + + @pytest.mark.asyncio + async def test_empty_assets_raises(self, exports_api): + """Test that an empty assets list raises ValueError.""" + with pytest.raises(ValueError, match="assets"): + await exports_api.export_by_asset( + assets=[], + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.CSV, + ) + + @pytest.mark.asyncio + async def test_null_asset_raises(self, exports_api): + """Test that an assets list containing an empty string raises ValueError.""" + with pytest.raises(ValueError, match="empty or null"): + await exports_api.export_by_asset( + assets=[""], + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.CSV, + ) + + @pytest.mark.asyncio + async def test_start_after_stop_raises(self, exports_api): + """Test that start_time >= stop_time raises ValueError.""" + with pytest.raises(ValueError, match="start_time"): + await exports_api.export_by_asset( + assets=["asset-1"], + start_time=STOP, + stop_time=START, + output_format=ExportOutputFormat.CSV, + ) + + class TestExportByTimeRange: + """Tests for the export_by_time_range method.""" + + @pytest.mark.asyncio + async def test_delegates_to_low_level_with_channels(self, exports_api): + """Test that export_by_time_range passes correct args to low-level.""" await exports_api.export_by_time_range( - start_time=START, stop_time=STOP, output_format=ExportOutputFormat.CSV + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.SUN, + channels=["ch-1"], ) - @pytest.mark.asyncio - async def test_start_after_stop_raises(self, exports_api): - """Test that start_time >= stop_time raises ValueError.""" - with pytest.raises(ValueError, match="start_time"): + exports_api._low_level_client.export_by_time_range.assert_awaited_once_with( + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_SUN, + channel_ids=["ch-1"], + calculated_channels=None, + use_legacy_format=False, + simplify_channel_names=False, + combine_runs=False, + split_export_by_asset=False, + split_export_by_run=False, + ) + + @pytest.mark.asyncio + async def test_delegates_to_low_level_with_calc_channels( + self, exports_api, sample_calc_channels + ): + """Test that calculated channels are passed through to the low-level client.""" await exports_api.export_by_time_range( - start_time=STOP, - stop_time=START, + start_time=START, + stop_time=STOP, output_format=ExportOutputFormat.CSV, - channel_ids=["ch-1"], + calculated_channels=sample_calc_channels, + ) + + call_kwargs = exports_api._low_level_client.export_by_time_range.call_args.kwargs + assert call_kwargs["calculated_channels"] == sample_calc_channels + assert call_kwargs["channel_ids"] == [] + + @pytest.mark.asyncio + async def test_no_channels_raises(self, exports_api): + """Test that omitting both channels and calculated_channels raises ValueError.""" + with pytest.raises(ValueError, match=r"channels.*calculated_channels"): + await exports_api.export_by_time_range( + start_time=START, stop_time=STOP, output_format=ExportOutputFormat.CSV + ) + + @pytest.mark.asyncio + async def test_start_after_stop_raises(self, exports_api): + """Test that start_time >= stop_time raises ValueError.""" + with pytest.raises(ValueError, match="start_time"): + await exports_api.export_by_time_range( + start_time=STOP, + stop_time=START, + output_format=ExportOutputFormat.CSV, + channels=["ch-1"], + ) + + class TestWaitUntilComplete: + """Tests for the wait_until_complete method.""" + + @pytest.mark.asyncio + async def test_returns_download_url_on_success(self, exports_api, mock_client): + """Test that a finished job returns the download URL.""" + mock_job = MagicMock(spec=Job) + mock_job._id_or_error = "job-123" + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=mock_job) + exports_api._low_level_client.get_download_url = AsyncMock( + return_value="https://download.test/export.zip" + ) + + completed_job = MagicMock(spec=Job) + completed_job.job_status = JobStatus.FINISHED + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) + + url = await exports_api.wait_until_complete(job=mock_job) + + assert url == "https://download.test/export.zip" + mock_client.async_.jobs.wait_until_complete.assert_awaited_once_with( + job="job-123", polling_interval_secs=5, timeout_secs=None ) + exports_api._low_level_client.get_download_url.assert_awaited_once_with( + job_id="job-123" + ) + + @pytest.mark.asyncio + async def test_accepts_job_id_string(self, exports_api, mock_client): + """Test that a raw job_id string is accepted.""" + completed_job = MagicMock(spec=Job) + completed_job.job_status = JobStatus.FINISHED + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) + exports_api._low_level_client.get_download_url = AsyncMock( + return_value="https://download.test/export.zip" + ) + + url = await exports_api.wait_until_complete(job="job-456") + + assert url == "https://download.test/export.zip" + mock_client.async_.jobs.wait_until_complete.assert_awaited_once_with( + job="job-456", polling_interval_secs=5, timeout_secs=None + ) + + @pytest.mark.asyncio + async def test_custom_polling_and_timeout(self, exports_api, mock_client): + """Test that polling_interval_secs and timeout_secs are forwarded.""" + mock_job = MagicMock(spec=Job) + mock_job._id_or_error = "job-123" + + completed_job = MagicMock(spec=Job) + completed_job.job_status = JobStatus.FINISHED + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) + exports_api._low_level_client.get_download_url = AsyncMock( + return_value="https://download.test/export.zip" + ) + + await exports_api.wait_until_complete( + job=mock_job, polling_interval_secs=1, timeout_secs=10 + ) + + mock_client.async_.jobs.wait_until_complete.assert_awaited_once_with( + job="job-123", polling_interval_secs=1, timeout_secs=10 + ) + + @pytest.mark.asyncio + async def test_failed_job_raises_with_reason(self, exports_api, mock_client): + """Test that a failed job raises RuntimeError with the error message.""" + mock_job = MagicMock(spec=Job) + mock_job._id_or_error = "job-fail" + + completed_job = MagicMock(spec=Job) + completed_job.job_status = JobStatus.FAILED + completed_job.job_status_details = DataExportStatusDetails( + error_message="out of memory" + ) + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) + + with pytest.raises(RuntimeError, match=r"failed.*out of memory"): + await exports_api.wait_until_complete(job=mock_job) + + @pytest.mark.asyncio + async def test_failed_job_raises_without_reason(self, exports_api, mock_client): + """Test that a failed job with no status details still raises RuntimeError.""" + mock_job = MagicMock(spec=Job) + mock_job._id_or_error = "job-fail" + + completed_job = MagicMock(spec=Job) + completed_job.job_status = JobStatus.FAILED + completed_job.job_status_details = None + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) + + with pytest.raises(RuntimeError, match="failed"): + await exports_api.wait_until_complete(job=mock_job) + + @pytest.mark.asyncio + async def test_cancelled_job_raises(self, exports_api, mock_client): + """Test that a cancelled job raises RuntimeError.""" + mock_job = MagicMock(spec=Job) + mock_job._id_or_error = "job-cancel" + + completed_job = MagicMock(spec=Job) + completed_job.job_status = JobStatus.CANCELLED + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) + + with pytest.raises(RuntimeError, match="cancelled"): + await exports_api.wait_until_complete(job=mock_job) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 90fd5d748..4b2643d46 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -2,59 +2,19 @@ from typing import TYPE_CHECKING -from sift.calculated_channels.v2.calculated_channels_pb2 import ( - CalculatedChannelAbstractChannelReference, -) -from sift.exports.v1.exports_pb2 import ( - AssetsAndTimeRange, - CalculatedChannelConfig, - ExportDataRequest, - ExportOptions, - RunsAndTimeRange, - TimeRange, -) - from sift_client._internal.low_level_wrappers.exports import ExportsLowLevelClient -from sift_client._internal.util.timestamp import to_pb_timestamp from sift_client.resources._base import ResourceBase -from sift_client.sift_types.export import ExportOutputFormat # noqa: TC001 -from sift_client.sift_types.run import Run from sift_client.sift_types.asset import Asset from sift_client.sift_types.channel import Channel -from sift_client.sift_types.calculated_channel import CalculatedChannel, CalculatedChannelCreate +from sift_client.sift_types.export import ExportOutputFormat # noqa: TC001 +from sift_client.sift_types.job import Job +from sift_client.sift_types.run import Run if TYPE_CHECKING: from datetime import datetime from sift_client.client import SiftClient - - -def _build_calc_channels( - calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None, -) -> list[CalculatedChannelConfig]: - if not calculated_channels: - return [] - configs = [] - for cc in calculated_channels: - if isinstance(cc, CalculatedChannelCreate): - refs = cc.expression_channel_references or [] - else: - refs = cc.channel_references - configs.append( - CalculatedChannelConfig( - name=cc.name, - expression=cc.expression, - channel_references=[ - CalculatedChannelAbstractChannelReference( - channel_reference=ref.channel_reference, - channel_identifier=ref.channel_identifier, - ) - for ref in refs - ], - units=cc.units, - ) - ) - return configs + from sift_client.sift_types.calculated_channel import CalculatedChannel, CalculatedChannelCreate class ExportsAPIAsync(ResourceBase): @@ -64,28 +24,32 @@ class ExportsAPIAsync(ResourceBase): - ``export_by_run`` - Export data from one or more runs. - ``export_by_asset`` - Export data from one or more assets within a time range. - - ``export_by_time_range`` - Export data within a time range (requires channel_ids or calculated_channel_configs). + - ``export_by_time_range`` - Export data within a time range (requires channels or calculated_channels). - Each method handles the full export lifecycle: initiating the export, polling for - completion (if async), and returning the download URL. + Each method initiates the export and returns a Job handle. Use ``wait_until_complete`` + to poll the job and retrieve the download URL. Example:: from sift_client.sift_types.export import ExportOutputFormat # Export by run - url = await client.async_.exports.export_by_run( - run_ids=["run-id-1"], + run = await client.async_.runs.get(run_id="run-id-1") + job = await client.async_.exports.export_by_run( + runs=[run], output_format=ExportOutputFormat.CSV, ) + url = await client.async_.exports.wait_until_complete(job=job) # Export by asset with time range - url = await client.async_.exports.export_by_asset( - asset_ids=["asset-id-1"], + asset = await client.async_.assets.get(asset_id="asset-id-1") + job = await client.async_.exports.export_by_asset( + assets=[asset], start_time=start, stop_time=stop, output_format=ExportOutputFormat.CSV, ) + url = await client.async_.exports.wait_until_complete(job=job) """ def __init__(self, sift_client: SiftClient): @@ -111,35 +75,31 @@ async def export_by_run( combine_runs: bool = False, split_export_by_asset: bool = False, split_export_by_run: bool = False, - polling_interval_secs: int = 5, - timeout_secs: int | None = None, - ) -> str: + ) -> Job: """Export data scoped by one or more runs. + Initiates the export on the server and returns a Job handle. Use + ``wait_until_complete`` to poll for completion and get the download URL. + If no start_time/stop_time are provided, the full time range of each run is used. - If no channel_ids or calculated_channel_configs are provided, all channels from + If no channels or calculated_channels are provided, all channels from the run's assets are included. Args: - run_ids: One or more run IDs to export data from. + runs: One or more Run objects or run IDs to export data from. output_format: The file format for the export (CSV or Sun/WinPlot). start_time: Optional start time to narrow the export within the run(s). stop_time: Optional stop time to narrow the export within the run(s). - channel_ids: Optional list of channel IDs to include. If omitted, all channels are exported. - calculated_channel_configs: Optional inline calculated channels to include in the export. + channels: Optional list of Channel objects or channel IDs to include. If omitted, all channels are exported. + calculated_channels: Optional calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. split_export_by_run: Split each run into a separate file, with run name removed from channel name display. - polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. - timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. Returns: - A presigned download URL for the exported zip file. - - Raises: - TimeoutError: If the export job does not complete within timeout_secs. + A Job handle for the pending export. """ if not runs: raise ValueError("'runs' must be a non-empty list of run objects or run ids.") @@ -151,40 +111,26 @@ async def export_by_run( raise ValueError("'start_time' must be before 'stop_time'.") run_ids = [r._id_or_error if isinstance(r, Run) else r for r in runs] - - runs_and_time_range = RunsAndTimeRange(run_ids=run_ids) - if start_time: - runs_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) - if stop_time: - runs_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) - - export_options = ExportOptions( - use_legacy_format=use_legacy_format, - simplify_channel_names=simplify_channel_names, - combine_runs=combine_runs, - split_export_by_asset=split_export_by_asset, - split_export_by_run=split_export_by_run, - ) - channel_ids = ( [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] ) - request = ExportDataRequest( - runs_and_time_range=runs_and_time_range, + job_id = await self._low_level_client.export_by_run( + run_ids=run_ids, output_format=output_format.value, - export_options=export_options, + start_time=start_time, + stop_time=stop_time, channel_ids=channel_ids, - calculated_channel_configs=_build_calc_channels(calculated_channels), + calculated_channels=calculated_channels, + use_legacy_format=use_legacy_format, + simplify_channel_names=simplify_channel_names, + combine_runs=combine_runs, + split_export_by_asset=split_export_by_asset, + split_export_by_run=split_export_by_run, ) - response = await self._low_level_client.export_data(request=request) - - return await self._await_download_url( - job_id=response.job_id, - polling_interval_secs=polling_interval_secs, - timeout_secs=timeout_secs, - ) + job = await self.client.async_.jobs.get(job_id=job_id) + return self._apply_client_to_instance(job) async def export_by_asset( self, @@ -200,11 +146,12 @@ async def export_by_asset( combine_runs: bool = False, split_export_by_asset: bool = False, split_export_by_run: bool = False, - polling_interval_secs: int = 5, - timeout_secs: int | None = None, - ) -> str: + ) -> Job: """Export data scoped by one or more assets within a time range. + Initiates the export on the server and returns a Job handle. Use + ``wait_until_complete`` to poll for completion and get the download URL. + Both start_time and stop_time are required. If no channels or calculated_channels are provided, all channels from the assets are included. @@ -220,14 +167,9 @@ async def export_by_asset( combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. split_export_by_run: Split each run into a separate file, with run name removed from channel name display. - polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. - timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. Returns: - A presigned download URL for the exported zip file. - - Raises: - TimeoutError: If the export job does not complete within timeout_secs. + A Job handle for the pending export. """ if not assets: raise ValueError("'assets' must be a non-empty list of asset objects or asset IDs.") @@ -237,12 +179,17 @@ async def export_by_asset( raise ValueError("'start_time' must be before 'stop_time'.") asset_ids = [a._id_or_error if isinstance(a, Asset) else a for a in assets] + channel_ids = ( + [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] + ) - assets_and_time_range = AssetsAndTimeRange(asset_ids=asset_ids) - assets_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) - assets_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) - - export_options = ExportOptions( + job_id = await self._low_level_client.export_by_asset( + asset_ids=asset_ids, + start_time=start_time, + stop_time=stop_time, + output_format=output_format.value, + channel_ids=channel_ids, + calculated_channels=calculated_channels, use_legacy_format=use_legacy_format, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, @@ -250,25 +197,8 @@ async def export_by_asset( split_export_by_run=split_export_by_run, ) - channel_ids = ( - [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] - ) - - request = ExportDataRequest( - assets_and_time_range=assets_and_time_range, - channel_ids=channel_ids, - calculated_channel_configs=_build_calc_channels(calculated_channels), - output_format=output_format.value, - export_options=export_options, - ) - - response = await self._low_level_client.export_data(request=request) - - return await self._await_download_url( - job_id=response.job_id, - polling_interval_secs=polling_interval_secs, - timeout_secs=timeout_secs, - ) + job = await self.client.async_.jobs.get(job_id=job_id) + return self._apply_client_to_instance(job) async def export_by_time_range( self, @@ -283,11 +213,12 @@ async def export_by_time_range( combine_runs: bool = False, split_export_by_asset: bool = False, split_export_by_run: bool = False, - polling_interval_secs: int = 5, - timeout_secs: int | None = None, - ) -> str: + ) -> Job: """Export data within a time range. + Initiates the export on the server and returns a Job handle. Use + ``wait_until_complete`` to poll for completion and get the download URL. + Both start_time and stop_time are required. At least one of channels or calculated_channels **must** be provided to scope the data, since there are no runs or assets to infer channels from. @@ -303,15 +234,12 @@ async def export_by_time_range( combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. split_export_by_run: Split each run into a separate file, with run name removed from channel name display. - polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. - timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. Returns: - A presigned download URL for the exported zip file. + A Job handle for the pending export. Raises: ValueError: If neither channels nor calculated_channels is provided. - TimeoutError: If the export job does not complete within timeout_secs. """ if not channels and not calculated_channels: raise ValueError( @@ -321,11 +249,16 @@ async def export_by_time_range( if start_time >= stop_time: raise ValueError("'start_time' must be before 'stop_time'.") - time_range = TimeRange() - time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) - time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) + channel_ids = ( + [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] + ) - export_options = ExportOptions( + job_id = await self._low_level_client.export_by_time_range( + start_time=start_time, + stop_time=stop_time, + output_format=output_format.value, + channel_ids=channel_ids, + calculated_channels=calculated_channels, use_legacy_format=use_legacy_format, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, @@ -333,43 +266,50 @@ async def export_by_time_range( split_export_by_run=split_export_by_run, ) - channel_ids = ( - [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] - ) + job = await self.client.async_.jobs.get(job_id=job_id) + return self._apply_client_to_instance(job) - request = ExportDataRequest( - time_range=time_range, - channel_ids=channel_ids, - calculated_channel_configs=_build_calc_channels(calculated_channels), - output_format=output_format.value, - export_options=export_options, - ) + async def wait_until_complete( + self, + *, + job: Job | str, + polling_interval_secs: int = 5, + timeout_secs: int | None = None, + ) -> str: + """Wait for an export job to complete and return the download URL. - response = await self._low_level_client.export_data(request=request) + Polls the job status at the given interval until the job is FINISHED, + FAILED, or CANCELLED. - return await self._await_download_url( - job_id=response.job_id, - polling_interval_secs=polling_interval_secs, - timeout_secs=timeout_secs, - ) + Args: + job: The export Job or job ID to wait for. + polling_interval_secs: Seconds between status polls. Defaults to 5. + timeout_secs: Maximum seconds to wait. If None, polls indefinitely. - async def _await_download_url( - self, job_id: str, polling_interval_secs: int = 5, timeout_secs: int | None = None - ) -> str: - """Poll a background export job until complete, then return the download URL.""" + Returns: + A presigned download URL for the exported zip file. + + Raises: + RuntimeError: If the export job fails or is cancelled. + TimeoutError: If the export job does not complete within timeout_secs. + """ from sift_client.sift_types.job import DataExportStatusDetails, JobStatus - job = await self.client.async_.jobs.wait_until_complete( - job=job_id, polling_interval_secs=polling_interval_secs, timeout_secs=timeout_secs + job_id = job._id_or_error if isinstance(job, Job) else job + + completed_job = await self.client.async_.jobs.wait_until_complete( + job=job_id, + polling_interval_secs=polling_interval_secs, + timeout_secs=timeout_secs, ) - if job.job_status == JobStatus.FAILED: + if completed_job.job_status == JobStatus.FAILED: reason = "" if ( - isinstance(job.job_status_details, DataExportStatusDetails) - and job.job_status_details.error_message + isinstance(completed_job.job_status_details, DataExportStatusDetails) + and completed_job.job_status_details.error_message ): - reason = f": {job.job_status_details.error_message}" - raise RuntimeError(f"Export job '{job_id}' failed {reason}") - if job.job_status == JobStatus.CANCELLED: + reason = f": {completed_job.job_status_details.error_message}" + raise RuntimeError(f"Export job '{job_id}' failed: {reason}") + if completed_job.job_status == JobStatus.CANCELLED: raise RuntimeError(f"Export job '{job_id}' was cancelled.") return await self._low_level_client.get_download_url(job_id=job_id) diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index 222b36aea..9df53f9e9 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -21,7 +21,7 @@ if TYPE_CHECKING: CalculatedChannelUpdate, ) from sift_client.sift_types.channel import Channel - from sift_client.sift_types.export import ExportCalculatedChannel, ExportOutputFormat + from sift_client.sift_types.export import ExportOutputFormat from sift_client.sift_types.file_attachment import ( FileAttachment, FileAttachmentUpdate, @@ -547,28 +547,32 @@ class ExportsAPI: - ``export_by_run`` - Export data from one or more runs. - ``export_by_asset`` - Export data from one or more assets within a time range. - - ``export_by_time_range`` - Export data within a time range (requires channel_ids or calculated_channel_configs). + - ``export_by_time_range`` - Export data within a time range (requires channels or calculated_channels). - Each method handles the full export lifecycle: initiating the export, polling for - completion (if async), and returning the download URL. + Each method initiates the export and returns a Job handle. Use ``wait_until_complete`` + to poll the job and retrieve the download URL. Example:: from sift_client.sift_types.export import ExportOutputFormat # Export by run - url = await client.async_.exports.export_by_run( - run_ids=["run-id-1"], + run = await client.async_.runs.get(run_id="run-id-1") + job = await client.async_.exports.export_by_run( + runs=[run], output_format=ExportOutputFormat.CSV, ) + url = await client.async_.exports.wait_until_complete(job=job) # Export by asset with time range - url = await client.async_.exports.export_by_asset( - asset_ids=["asset-id-1"], + asset = await client.async_.assets.get(asset_id="asset-id-1") + job = await client.async_.exports.export_by_asset( + assets=[asset], start_time=start, stop_time=stop, output_format=ExportOutputFormat.CSV, ) + url = await client.async_.exports.wait_until_complete(job=job) """ def __init__(self, sift_client: SiftClient): @@ -583,91 +587,83 @@ class ExportsAPI: def export_by_asset( self, *, - asset_ids: list[str], + assets: list[str | Asset], start_time: datetime, stop_time: datetime, output_format: ExportOutputFormat, - channel_ids: list[str] | None = None, - calculated_channel_configs: list[ExportCalculatedChannel] | None = None, + channels: list[str | Channel] | None = None, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, split_export_by_asset: bool = False, split_export_by_run: bool = False, - polling_interval_secs: int = 5, - timeout_secs: int | None = None, - ) -> str: + ) -> Job: """Export data scoped by one or more assets within a time range. - Both start_time and stop_time are required. If no channel_ids or - calculated_channel_configs are provided, all channels from the assets are included. + Initiates the export on the server and returns a Job handle. Use + ``wait_until_complete`` to poll for completion and get the download URL. + + Both start_time and stop_time are required. If no channels or + calculated_channels are provided, all channels from the assets are included. Args: - asset_ids: One or more asset IDs to export data from. + assets: One or more Asset objects or asset IDs to export data from. start_time: Start of the time range to export. stop_time: End of the time range to export. output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). - channel_ids: Optional list of channel IDs to include. If omitted, all channels are exported. - calculated_channel_configs: Optional inline calculated channels to include in the export. + channels: Optional list of Channel objects or channel IDs to include. If omitted, all channels are exported. + calculated_channels: Optional calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. split_export_by_run: Split each run into a separate file, with run name removed from channel name display. - polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. - timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. Returns: - A presigned download URL for the exported zip file. - - Raises: - TimeoutError: If the export job does not complete within timeout_secs. + A Job handle for the pending export. """ ... def export_by_run( self, *, - run_ids: list[str], + runs: list[str | Run], output_format: ExportOutputFormat, start_time: datetime | None = None, stop_time: datetime | None = None, - channel_ids: list[str] | None = None, - calculated_channel_configs: list[ExportCalculatedChannel] | None = None, + channels: list[str | Channel] | None = None, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, split_export_by_asset: bool = False, split_export_by_run: bool = False, - polling_interval_secs: int = 5, - timeout_secs: int | None = None, - ) -> str: + ) -> Job: """Export data scoped by one or more runs. + Initiates the export on the server and returns a Job handle. Use + ``wait_until_complete`` to poll for completion and get the download URL. + If no start_time/stop_time are provided, the full time range of each run is used. - If no channel_ids or calculated_channel_configs are provided, all channels from + If no channels or calculated_channels are provided, all channels from the run's assets are included. Args: - run_ids: One or more run IDs to export data from. + runs: One or more Run objects or run IDs to export data from. output_format: The file format for the export (CSV or Sun/WinPlot). start_time: Optional start time to narrow the export within the run(s). stop_time: Optional stop time to narrow the export within the run(s). - channel_ids: Optional list of channel IDs to include. If omitted, all channels are exported. - calculated_channel_configs: Optional inline calculated channels to include in the export. + channels: Optional list of Channel objects or channel IDs to include. If omitted, all channels are exported. + calculated_channels: Optional calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. split_export_by_run: Split each run into a separate file, with run name removed from channel name display. - polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. - timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. Returns: - A presigned download URL for the exported zip file. - - Raises: - TimeoutError: If the export job does not complete within timeout_secs. + A Job handle for the pending export. """ ... @@ -677,41 +673,61 @@ class ExportsAPI: start_time: datetime, stop_time: datetime, output_format: ExportOutputFormat, - channel_ids: list[str] | None = None, - calculated_channel_configs: list[ExportCalculatedChannel] | None = None, + channels: list[str | Channel] | None = None, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, split_export_by_asset: bool = False, split_export_by_run: bool = False, - polling_interval_secs: int = 5, - timeout_secs: int | None = None, - ) -> str: + ) -> Job: """Export data within a time range. - Both start_time and stop_time are required. At least one of channel_ids or - calculated_channel_configs **must** be provided to scope the data, since there + Initiates the export on the server and returns a Job handle. Use + ``wait_until_complete`` to poll for completion and get the download URL. + + Both start_time and stop_time are required. At least one of channels or + calculated_channels **must** be provided to scope the data, since there are no runs or assets to infer channels from. Args: start_time: Start of the time range to export. stop_time: End of the time range to export. output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). - channel_ids: List of channel IDs to include in the export. - calculated_channel_configs: Inline calculated channels to include in the export. + channels: List of Channel objects or channel IDs to include in the export. + calculated_channels: Calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. split_export_by_run: Split each run into a separate file, with run name removed from channel name display. - polling_interval_secs: Seconds between status polls for async exports. Defaults to 5. - timeout_secs: Maximum seconds to wait for async exports. None means wait indefinitely. + + Returns: + A Job handle for the pending export. + + Raises: + ValueError: If neither channels nor calculated_channels is provided. + """ + ... + + def wait_until_complete( + self, *, job: Job | str, polling_interval_secs: int = 5, timeout_secs: int | None = None + ) -> str: + """Wait for an export job to complete and return the download URL. + + Polls the job status at the given interval until the job is FINISHED, + FAILED, or CANCELLED. + + Args: + job: The export Job or job ID to wait for. + polling_interval_secs: Seconds between status polls. Defaults to 5. + timeout_secs: Maximum seconds to wait. If None, polls indefinitely. Returns: A presigned download URL for the exported zip file. Raises: - ValueError: If neither channel_ids nor calculated_channel_configs is provided. + RuntimeError: If the export job fails or is cancelled. TimeoutError: If the export job does not complete within timeout_secs. """ ... From 5a5467868ea286b9a2220bd3ce522942ccfb6d4c Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Sun, 15 Mar 2026 15:46:07 -0700 Subject: [PATCH 13/53] python(fix): low-level-client accepts ExportOutputFormat enum --- .../_internal/low_level_wrappers/exports.py | 15 ++++++++------- .../sift_client/_tests/resources/test_exports.py | 10 ++++------ python/lib/sift_client/resources/exports.py | 6 +++--- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/exports.py b/python/lib/sift_client/_internal/low_level_wrappers/exports.py index 789807e61..c079c041c 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/exports.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/exports.py @@ -21,6 +21,7 @@ from sift_client._internal.low_level_wrappers.base import LowLevelClientBase from sift_client._internal.util.timestamp import to_pb_timestamp from sift_client.sift_types.calculated_channel import CalculatedChannel, CalculatedChannelCreate +from sift_client.sift_types.export import ExportOutputFormat from sift_client.transport import WithGrpcClient if TYPE_CHECKING: @@ -44,7 +45,7 @@ def _build_calc_channel_configs( configs.append( CalculatedChannelConfig( name=cc.name, - expression=cc.expression, + expression=cc.expression or "", channel_references=[ CalculatedChannelAbstractChannelReference( channel_reference=ref.channel_reference, @@ -94,7 +95,7 @@ async def export_by_run( self, *, run_ids: list[str], - output_format: int, + output_format: ExportOutputFormat, start_time: datetime | None = None, stop_time: datetime | None = None, channel_ids: list[str] | None = None, @@ -131,7 +132,7 @@ async def export_by_run( request = ExportDataRequest( runs_and_time_range=runs_and_time_range, - output_format=output_format, + output_format=output_format.value, export_options=_build_export_options( use_legacy_format=use_legacy_format, simplify_channel_names=simplify_channel_names, @@ -152,7 +153,7 @@ async def export_by_asset( asset_ids: list[str], start_time: datetime, stop_time: datetime, - output_format: int, + output_format: ExportOutputFormat, channel_ids: list[str] | None = None, calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, use_legacy_format: bool = False, @@ -185,7 +186,7 @@ async def export_by_asset( request = ExportDataRequest( assets_and_time_range=assets_and_time_range, - output_format=output_format, + output_format=output_format.value, export_options=_build_export_options( use_legacy_format=use_legacy_format, simplify_channel_names=simplify_channel_names, @@ -205,7 +206,7 @@ async def export_by_time_range( *, start_time: datetime, stop_time: datetime, - output_format: int, + output_format: ExportOutputFormat, channel_ids: list[str] | None = None, calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, use_legacy_format: bool = False, @@ -237,7 +238,7 @@ async def export_by_time_range( request = ExportDataRequest( time_range=time_range, - output_format=output_format, + output_format=output_format.value, export_options=_build_export_options( use_legacy_format=use_legacy_format, simplify_channel_names=simplify_channel_names, diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 8b167ba64..060495f78 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -13,8 +13,6 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest -from sift.exports.v1.exports_pb2 import ExportOutputFormat as ExportOutputFormatProto - from sift_client._internal.low_level_wrappers.exports import _build_calc_channel_configs from sift_client.resources.exports import ExportsAPIAsync from sift_client.sift_types.asset import Asset @@ -140,7 +138,7 @@ async def test_delegates_to_low_level_and_returns_job(self, exports_api): assert isinstance(job, MagicMock) exports_api._low_level_client.export_by_run.assert_awaited_once_with( run_ids=["run-1", "run-2"], - output_format=ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_CSV, + output_format=ExportOutputFormat.CSV, start_time=START, stop_time=STOP, channel_ids=["ch-1"], @@ -162,7 +160,7 @@ async def test_minimal_args(self, exports_api): exports_api._low_level_client.export_by_run.assert_awaited_once_with( run_ids=["run-1"], - output_format=ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_SUN, + output_format=ExportOutputFormat.SUN, start_time=None, stop_time=None, channel_ids=[], @@ -279,7 +277,7 @@ async def test_delegates_to_low_level_and_returns_job(self, exports_api): asset_ids=["asset-1"], start_time=START, stop_time=STOP, - output_format=ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_CSV, + output_format=ExportOutputFormat.CSV, channel_ids=["ch-1", "ch-2"], calculated_channels=None, use_legacy_format=False, @@ -354,7 +352,7 @@ async def test_delegates_to_low_level_with_channels(self, exports_api): exports_api._low_level_client.export_by_time_range.assert_awaited_once_with( start_time=START, stop_time=STOP, - output_format=ExportOutputFormatProto.EXPORT_OUTPUT_FORMAT_SUN, + output_format=ExportOutputFormat.SUN, channel_ids=["ch-1"], calculated_channels=None, use_legacy_format=False, diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 4b2643d46..53c81f4c4 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -117,7 +117,7 @@ async def export_by_run( job_id = await self._low_level_client.export_by_run( run_ids=run_ids, - output_format=output_format.value, + output_format=output_format, start_time=start_time, stop_time=stop_time, channel_ids=channel_ids, @@ -187,7 +187,7 @@ async def export_by_asset( asset_ids=asset_ids, start_time=start_time, stop_time=stop_time, - output_format=output_format.value, + output_format=output_format, channel_ids=channel_ids, calculated_channels=calculated_channels, use_legacy_format=use_legacy_format, @@ -256,7 +256,7 @@ async def export_by_time_range( job_id = await self._low_level_client.export_by_time_range( start_time=start_time, stop_time=stop_time, - output_format=output_format.value, + output_format=output_format, channel_ids=channel_ids, calculated_channels=calculated_channels, use_legacy_format=use_legacy_format, From ca1caf0866b287fcc091557948c38e6610ff09e4 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Sun, 15 Mar 2026 15:47:25 -0700 Subject: [PATCH 14/53] python(fix): linting --- python/lib/sift_client/_internal/low_level_wrappers/exports.py | 2 +- python/lib/sift_client/_tests/resources/test_exports.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/exports.py b/python/lib/sift_client/_internal/low_level_wrappers/exports.py index c079c041c..77daedb63 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/exports.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/exports.py @@ -21,12 +21,12 @@ from sift_client._internal.low_level_wrappers.base import LowLevelClientBase from sift_client._internal.util.timestamp import to_pb_timestamp from sift_client.sift_types.calculated_channel import CalculatedChannel, CalculatedChannelCreate -from sift_client.sift_types.export import ExportOutputFormat from sift_client.transport import WithGrpcClient if TYPE_CHECKING: from datetime import datetime + from sift_client.sift_types.export import ExportOutputFormat from sift_client.transport.grpc_transport import GrpcClient diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 060495f78..2f0eaf5c4 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -13,6 +13,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest + from sift_client._internal.low_level_wrappers.exports import _build_calc_channel_configs from sift_client.resources.exports import ExportsAPIAsync from sift_client.sift_types.asset import Asset From dae2ae59c80be39db883162093729e782d37395a Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Mon, 16 Mar 2026 11:25:19 -0700 Subject: [PATCH 15/53] python(refactor): merge low-level export methods into single method, updated unit tests --- .../_internal/low_level_wrappers/exports.py | 184 ++++-------------- .../_tests/resources/test_exports.py | 24 ++- python/lib/sift_client/resources/exports.py | 13 +- 3 files changed, 53 insertions(+), 168 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/exports.py b/python/lib/sift_client/_internal/low_level_wrappers/exports.py index 77daedb63..d1be09558 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/exports.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/exports.py @@ -59,24 +59,6 @@ def _build_calc_channel_configs( return configs -def _build_export_options( - *, - use_legacy_format: bool = False, - simplify_channel_names: bool = False, - combine_runs: bool = False, - split_export_by_asset: bool = False, - split_export_by_run: bool = False, -) -> ExportOptions: - """Build an ExportOptions proto from primitive flags.""" - return ExportOptions( - use_legacy_format=use_legacy_format, - simplify_channel_names=simplify_channel_names, - combine_runs=combine_runs, - split_export_by_asset=split_export_by_asset, - split_export_by_run=split_export_by_run, - ) - - class ExportsLowLevelClient(LowLevelClientBase, WithGrpcClient): """Low-level client for the ExportsAPI. @@ -91,11 +73,12 @@ def __init__(self, grpc_client: GrpcClient): """ super().__init__(grpc_client) - async def export_by_run( + async def export_data( self, *, - run_ids: list[str], output_format: ExportOutputFormat, + run_ids: list[str] | None = None, + asset_ids: list[str] | None = None, start_time: datetime | None = None, stop_time: datetime | None = None, channel_ids: list[str] | None = None, @@ -106,69 +89,20 @@ async def export_by_run( split_export_by_asset: bool = False, split_export_by_run: bool = False, ) -> str: - """Initiate a data export scoped by runs. - - Args: - run_ids: List of run IDs to export. - output_format: The proto enum value for the export format. - start_time: Optional start time to narrow the export. - stop_time: Optional stop time to narrow the export. - channel_ids: Optional list of channel IDs to include. - calculated_channels: Optional calculated channel objects to include. - use_legacy_format: Use legacy channel name display format. - simplify_channel_names: Simplify channel names if unique. - combine_runs: Combine identical channels across runs. - split_export_by_asset: Split export by asset. - split_export_by_run: Split export by run. + """Initiate a data export. - Returns: - The job ID for the background export. - """ - runs_and_time_range = RunsAndTimeRange(run_ids=run_ids) - if start_time: - runs_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) - if stop_time: - runs_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) - - request = ExportDataRequest( - runs_and_time_range=runs_and_time_range, - output_format=output_format.value, - export_options=_build_export_options( - use_legacy_format=use_legacy_format, - simplify_channel_names=simplify_channel_names, - combine_runs=combine_runs, - split_export_by_asset=split_export_by_asset, - split_export_by_run=split_export_by_run, - ), - channel_ids=channel_ids or [], - calculated_channel_configs=_build_calc_channel_configs(calculated_channels), - ) - - response = await self._export_data(request) - return response.job_id - - async def export_by_asset( - self, - *, - asset_ids: list[str], - start_time: datetime, - stop_time: datetime, - output_format: ExportOutputFormat, - channel_ids: list[str] | None = None, - calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - use_legacy_format: bool = False, - simplify_channel_names: bool = False, - combine_runs: bool = False, - split_export_by_asset: bool = False, - split_export_by_run: bool = False, - ) -> str: - """Initiate a data export scoped by assets and a time range. + Builds the ExportDataRequest proto and makes the gRPC call. The export + scope is determined by which ID list is provided: + - run_ids: export by run + - asset_ids: export by asset (requires start_time/stop_time) + - neither: export by time range (requires start_time/stop_time) Args: - asset_ids: List of asset IDs to export. - start_time: Start of the time range. - stop_time: End of the time range. - output_format: The proto enum value for the export format. + output_format: The export format enum. + run_ids: Optional list of run IDs (export by run). + asset_ids: Optional list of asset IDs (export by asset). + start_time: Optional start time for the export. + stop_time: Optional stop time for the export. channel_ids: Optional list of channel IDs to include. calculated_channels: Optional calculated channel objects to include. use_legacy_format: Use legacy channel name display format. @@ -180,14 +114,9 @@ async def export_by_asset( Returns: The job ID for the background export. """ - assets_and_time_range = AssetsAndTimeRange(asset_ids=asset_ids) - assets_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) - assets_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) - request = ExportDataRequest( - assets_and_time_range=assets_and_time_range, output_format=output_format.value, - export_options=_build_export_options( + export_options=ExportOptions( use_legacy_format=use_legacy_format, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, @@ -198,66 +127,35 @@ async def export_by_asset( calculated_channel_configs=_build_calc_channel_configs(calculated_channels), ) - response = await self._export_data(request) - return response.job_id - - async def export_by_time_range( - self, - *, - start_time: datetime, - stop_time: datetime, - output_format: ExportOutputFormat, - channel_ids: list[str] | None = None, - calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - use_legacy_format: bool = False, - simplify_channel_names: bool = False, - combine_runs: bool = False, - split_export_by_asset: bool = False, - split_export_by_run: bool = False, - ) -> str: - """Initiate a data export scoped by a time range. + if run_ids is not None: + runs_and_time_range = RunsAndTimeRange(run_ids=run_ids) + if start_time: + runs_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) + if stop_time: + runs_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) + request.runs_and_time_range.CopyFrom(runs_and_time_range) - Args: - start_time: Start of the time range. - stop_time: End of the time range. - output_format: The proto enum value for the export format. - channel_ids: Optional list of channel IDs to include. - calculated_channels: Optional calculated channel objects to include. - use_legacy_format: Use legacy channel name display format. - simplify_channel_names: Simplify channel names if unique. - combine_runs: Combine identical channels across runs. - split_export_by_asset: Split export by asset. - split_export_by_run: Split export by run. + elif asset_ids is not None: + assets_and_time_range = AssetsAndTimeRange(asset_ids=asset_ids) + assets_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) + assets_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) + request.assets_and_time_range.CopyFrom(assets_and_time_range) - Returns: - The job ID for the background export. - """ - time_range = TimeRange() - time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) - time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) - - request = ExportDataRequest( - time_range=time_range, - output_format=output_format.value, - export_options=_build_export_options( - use_legacy_format=use_legacy_format, - simplify_channel_names=simplify_channel_names, - combine_runs=combine_runs, - split_export_by_asset=split_export_by_asset, - split_export_by_run=split_export_by_run, - ), - channel_ids=channel_ids or [], - calculated_channel_configs=_build_calc_channel_configs(calculated_channels), - ) + else: + time_range = TimeRange() + time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) + time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) + request.time_range.CopyFrom(time_range) - response = await self._export_data(request) + response = await self._grpc_client.get_stub(ExportServiceStub).ExportData(request) + response = cast("ExportDataResponse", response) return response.job_id async def get_download_url(self, job_id: str) -> str: """Get the download URL for a background export job. Args: - job_id: The job ID returned from an export method. + job_id: The job ID returned from export_data. Returns: The presigned URL to download the exported zip file. @@ -266,15 +164,3 @@ async def get_download_url(self, job_id: str) -> str: response = await self._grpc_client.get_stub(ExportServiceStub).GetDownloadUrl(request) response = cast("GetDownloadUrlResponse", response) return response.presigned_url - - async def _export_data(self, request: ExportDataRequest) -> ExportDataResponse: - """Make the ExportData gRPC call. - - Args: - request: The ExportDataRequest proto message. - - Returns: - The ExportDataResponse. - """ - response = await self._grpc_client.get_stub(ExportServiceStub).ExportData(request) - return cast("ExportDataResponse", response) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 2f0eaf5c4..00bdde579 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -52,10 +52,8 @@ def exports_api(mock_client, mock_job): with patch("sift_client.resources.exports.ExportsLowLevelClient", autospec=True) as mock_ll: api = ExportsAPIAsync(mock_client) api._low_level_client = mock_ll.return_value - # Default: low-level export methods return a job_id - api._low_level_client.export_by_run = AsyncMock(return_value="job-123") - api._low_level_client.export_by_asset = AsyncMock(return_value="job-123") - api._low_level_client.export_by_time_range = AsyncMock(return_value="job-123") + # Default: low-level export_data returns a job_id + api._low_level_client.export_data = AsyncMock(return_value="job-123") # Default: jobs.get returns a mock Job mock_client.async_.jobs.get = AsyncMock(return_value=mock_job) return api @@ -137,7 +135,7 @@ async def test_delegates_to_low_level_and_returns_job(self, exports_api): ) assert isinstance(job, MagicMock) - exports_api._low_level_client.export_by_run.assert_awaited_once_with( + exports_api._low_level_client.export_data.assert_awaited_once_with( run_ids=["run-1", "run-2"], output_format=ExportOutputFormat.CSV, start_time=START, @@ -159,7 +157,7 @@ async def test_minimal_args(self, exports_api): output_format=ExportOutputFormat.SUN, ) - exports_api._low_level_client.export_by_run.assert_awaited_once_with( + exports_api._low_level_client.export_data.assert_awaited_once_with( run_ids=["run-1"], output_format=ExportOutputFormat.SUN, start_time=None, @@ -182,7 +180,7 @@ async def test_with_calculated_channels(self, exports_api, sample_calc_channels) calculated_channels=sample_calc_channels, ) - call_kwargs = exports_api._low_level_client.export_by_run.call_args.kwargs + call_kwargs = exports_api._low_level_client.export_data.call_args.kwargs assert call_kwargs["calculated_channels"] == sample_calc_channels @pytest.mark.asyncio @@ -196,7 +194,7 @@ async def test_resolves_run_objects_to_ids(self, exports_api): output_format=ExportOutputFormat.CSV, ) - call_kwargs = exports_api._low_level_client.export_by_run.call_args.kwargs + call_kwargs = exports_api._low_level_client.export_data.call_args.kwargs assert call_kwargs["run_ids"] == ["resolved-run-id", "raw-id"] @pytest.mark.asyncio @@ -211,7 +209,7 @@ async def test_resolves_channel_objects_to_ids(self, exports_api): channels=[mock_channel, "raw-ch-id"], ) - call_kwargs = exports_api._low_level_client.export_by_run.call_args.kwargs + call_kwargs = exports_api._low_level_client.export_data.call_args.kwargs assert call_kwargs["channel_ids"] == ["resolved-ch-id", "raw-ch-id"] @pytest.mark.asyncio @@ -274,7 +272,7 @@ async def test_delegates_to_low_level_and_returns_job(self, exports_api): ) assert isinstance(job, MagicMock) - exports_api._low_level_client.export_by_asset.assert_awaited_once_with( + exports_api._low_level_client.export_data.assert_awaited_once_with( asset_ids=["asset-1"], start_time=START, stop_time=STOP, @@ -301,7 +299,7 @@ async def test_resolves_asset_objects_to_ids(self, exports_api): output_format=ExportOutputFormat.CSV, ) - call_kwargs = exports_api._low_level_client.export_by_asset.call_args.kwargs + call_kwargs = exports_api._low_level_client.export_data.call_args.kwargs assert call_kwargs["asset_ids"] == ["resolved-asset-id", "raw-id"] @pytest.mark.asyncio @@ -350,7 +348,7 @@ async def test_delegates_to_low_level_with_channels(self, exports_api): channels=["ch-1"], ) - exports_api._low_level_client.export_by_time_range.assert_awaited_once_with( + exports_api._low_level_client.export_data.assert_awaited_once_with( start_time=START, stop_time=STOP, output_format=ExportOutputFormat.SUN, @@ -375,7 +373,7 @@ async def test_delegates_to_low_level_with_calc_channels( calculated_channels=sample_calc_channels, ) - call_kwargs = exports_api._low_level_client.export_by_time_range.call_args.kwargs + call_kwargs = exports_api._low_level_client.export_data.call_args.kwargs assert call_kwargs["calculated_channels"] == sample_calc_channels assert call_kwargs["channel_ids"] == [] diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 53c81f4c4..f36b7d669 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -115,7 +115,7 @@ async def export_by_run( [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] ) - job_id = await self._low_level_client.export_by_run( + job_id = await self._low_level_client.export_data( run_ids=run_ids, output_format=output_format, start_time=start_time, @@ -183,7 +183,7 @@ async def export_by_asset( [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] ) - job_id = await self._low_level_client.export_by_asset( + job_id = await self._low_level_client.export_data( asset_ids=asset_ids, start_time=start_time, stop_time=stop_time, @@ -253,7 +253,7 @@ async def export_by_time_range( [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] ) - job_id = await self._low_level_client.export_by_time_range( + job_id = await self._low_level_client.export_data( start_time=start_time, stop_time=stop_time, output_format=output_format, @@ -303,13 +303,14 @@ async def wait_until_complete( timeout_secs=timeout_secs, ) if completed_job.job_status == JobStatus.FAILED: - reason = "" if ( isinstance(completed_job.job_status_details, DataExportStatusDetails) and completed_job.job_status_details.error_message ): - reason = f": {completed_job.job_status_details.error_message}" - raise RuntimeError(f"Export job '{job_id}' failed: {reason}") + raise RuntimeError( + f"Export job '{job_id}' failed: {completed_job.job_status_details.error_message}" + ) + raise RuntimeError(f"Export job '{job_id}' failed.") if completed_job.job_status == JobStatus.CANCELLED: raise RuntimeError(f"Export job '{job_id}' was cancelled.") return await self._low_level_client.get_download_url(job_id=job_id) From 60f2cff6929a84398de35a380de05ae7bc533087 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Mon, 16 Mar 2026 11:41:31 -0700 Subject: [PATCH 16/53] python(fix): add assertions for datetime to resolve mypy errors --- python/lib/sift_client/_internal/low_level_wrappers/exports.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/exports.py b/python/lib/sift_client/_internal/low_level_wrappers/exports.py index d1be09558..4f6eb7593 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/exports.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/exports.py @@ -136,12 +136,14 @@ async def export_data( request.runs_and_time_range.CopyFrom(runs_and_time_range) elif asset_ids is not None: + assert start_time is not None and stop_time is not None assets_and_time_range = AssetsAndTimeRange(asset_ids=asset_ids) assets_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) assets_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) request.assets_and_time_range.CopyFrom(assets_and_time_range) else: + assert start_time is not None and stop_time is not None time_range = TimeRange() time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) From 3ef402fb1ebb53e06cfb5c3eeaef5f5eeed6e37b Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Mon, 16 Mar 2026 11:42:36 -0700 Subject: [PATCH 17/53] linting --- .../lib/sift_client/_internal/low_level_wrappers/exports.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/exports.py b/python/lib/sift_client/_internal/low_level_wrappers/exports.py index 4f6eb7593..781775eca 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/exports.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/exports.py @@ -136,14 +136,16 @@ async def export_data( request.runs_and_time_range.CopyFrom(runs_and_time_range) elif asset_ids is not None: - assert start_time is not None and stop_time is not None + assert start_time is not None + assert stop_time is not None assets_and_time_range = AssetsAndTimeRange(asset_ids=asset_ids) assets_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) assets_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) request.assets_and_time_range.CopyFrom(assets_and_time_range) else: - assert start_time is not None and stop_time is not None + assert start_time is not None + assert stop_time is not None time_range = TimeRange() time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) From 1420dda57570425e7f5ee4c73f737732727891c6 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Mon, 16 Mar 2026 13:14:17 -0700 Subject: [PATCH 18/53] python(refactor): remove redundant code --- .../_internal/low_level_wrappers/exports.py | 13 +++++++++---- python/lib/sift_client/resources/exports.py | 11 ++++------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/exports.py b/python/lib/sift_client/_internal/low_level_wrappers/exports.py index 781775eca..680680772 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/exports.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/exports.py @@ -136,16 +136,21 @@ async def export_data( request.runs_and_time_range.CopyFrom(runs_and_time_range) elif asset_ids is not None: - assert start_time is not None - assert stop_time is not None + if start_time is None or stop_time is None: + raise ValueError( + "start_time and stop_time must be provided when exporting by asset." + ) + assets_and_time_range = AssetsAndTimeRange(asset_ids=asset_ids) assets_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) assets_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) request.assets_and_time_range.CopyFrom(assets_and_time_range) else: - assert start_time is not None - assert stop_time is not None + if start_time is None or stop_time is None: + raise ValueError( + "start_time and stop_time must be provided when exporting by time range." + ) time_range = TimeRange() time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index f36b7d669..b4161e19f 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -59,7 +59,7 @@ def __init__(self, sift_client: SiftClient): sift_client: The Sift client to use. """ super().__init__(sift_client) - self._low_level_client = ExportsLowLevelClient(grpc_client=self._sift_client.grpc_client) + self._low_level_client = ExportsLowLevelClient(grpc_client=self.client.grpc_client) async def export_by_run( self, @@ -129,8 +129,7 @@ async def export_by_run( split_export_by_run=split_export_by_run, ) - job = await self.client.async_.jobs.get(job_id=job_id) - return self._apply_client_to_instance(job) + return await self.client.async_.jobs.get(job_id=job_id) async def export_by_asset( self, @@ -197,8 +196,7 @@ async def export_by_asset( split_export_by_run=split_export_by_run, ) - job = await self.client.async_.jobs.get(job_id=job_id) - return self._apply_client_to_instance(job) + return await self.client.async_.jobs.get(job_id=job_id) async def export_by_time_range( self, @@ -266,8 +264,7 @@ async def export_by_time_range( split_export_by_run=split_export_by_run, ) - job = await self.client.async_.jobs.get(job_id=job_id) - return self._apply_client_to_instance(job) + return await self.client.async_.jobs.get(job_id=job_id) async def wait_until_complete( self, From fc844e83f1c09bca6e4f45bcfe729cf62b9f6380 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Mon, 16 Mar 2026 15:32:34 -0700 Subject: [PATCH 19/53] python(refactor): removed use_legacy_format as a possible field for exports --- .../sift_client/_internal/low_level_wrappers/exports.py | 4 +--- python/lib/sift_client/resources/exports.py | 9 --------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/exports.py b/python/lib/sift_client/_internal/low_level_wrappers/exports.py index 680680772..f7005d2dd 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/exports.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/exports.py @@ -83,7 +83,6 @@ async def export_data( stop_time: datetime | None = None, channel_ids: list[str] | None = None, calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, split_export_by_asset: bool = False, @@ -105,7 +104,6 @@ async def export_data( stop_time: Optional stop time for the export. channel_ids: Optional list of channel IDs to include. calculated_channels: Optional calculated channel objects to include. - use_legacy_format: Use legacy channel name display format. simplify_channel_names: Simplify channel names if unique. combine_runs: Combine identical channels across runs. split_export_by_asset: Split export by asset. @@ -117,7 +115,7 @@ async def export_data( request = ExportDataRequest( output_format=output_format.value, export_options=ExportOptions( - use_legacy_format=use_legacy_format, + use_legacy_format=False, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, split_export_by_asset=split_export_by_asset, diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index b4161e19f..c4b6e51a0 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -70,7 +70,6 @@ async def export_by_run( stop_time: datetime | None = None, channels: list[str | Channel] | None = None, calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, split_export_by_asset: bool = False, @@ -92,7 +91,6 @@ async def export_by_run( stop_time: Optional stop time to narrow the export within the run(s). channels: Optional list of Channel objects or channel IDs to include. If omitted, all channels are exported. calculated_channels: Optional calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. - use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. @@ -122,7 +120,6 @@ async def export_by_run( stop_time=stop_time, channel_ids=channel_ids, calculated_channels=calculated_channels, - use_legacy_format=use_legacy_format, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, split_export_by_asset=split_export_by_asset, @@ -140,7 +137,6 @@ async def export_by_asset( output_format: ExportOutputFormat, channels: list[str | Channel] | None = None, calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, split_export_by_asset: bool = False, @@ -161,7 +157,6 @@ async def export_by_asset( output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). channels: Optional list of Channel objects or channel IDs to include. If omitted, all channels are exported. calculated_channels: Optional calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. - use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. @@ -189,7 +184,6 @@ async def export_by_asset( output_format=output_format, channel_ids=channel_ids, calculated_channels=calculated_channels, - use_legacy_format=use_legacy_format, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, split_export_by_asset=split_export_by_asset, @@ -206,7 +200,6 @@ async def export_by_time_range( output_format: ExportOutputFormat, channels: list[str | Channel] | None = None, calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, split_export_by_asset: bool = False, @@ -227,7 +220,6 @@ async def export_by_time_range( output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). channels: List of Channel objects or channel IDs to include in the export. calculated_channels: Calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. - use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. @@ -257,7 +249,6 @@ async def export_by_time_range( output_format=output_format, channel_ids=channel_ids, calculated_channels=calculated_channels, - use_legacy_format=use_legacy_format, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, split_export_by_asset=split_export_by_asset, From 6787655d777d0f7802792ec4d696b5e8efc99e7e Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 17 Mar 2026 13:57:44 -0700 Subject: [PATCH 20/53] python(feat): return file path from export job, update test --- .../_tests/resources/test_exports.py | 94 ++++++++++++++----- python/lib/sift_client/resources/exports.py | 73 ++++++++++++-- .../resources/sync_stubs/__init__.pyi | 29 +++--- 3 files changed, 150 insertions(+), 46 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 00bdde579..59c917d14 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -3,7 +3,7 @@ These tests validate the usage of the ExportsAPIAsync including: - Correct delegation to the low-level client for all three export methods - Domain object resolution (Run -> run_id, Asset -> asset_id, Channel -> channel_id) -- Job lifecycle: export methods return Job, wait_until_complete returns URL +- Job lifecycle: export methods return Job, wait_until_complete returns list of file paths - Input validation and error handling """ @@ -127,11 +127,9 @@ async def test_delegates_to_low_level_and_returns_job(self, exports_api): start_time=START, stop_time=STOP, channels=["ch-1"], - use_legacy_format=True, simplify_channel_names=True, combine_runs=True, split_export_by_asset=True, - split_export_by_run=True, ) assert isinstance(job, MagicMock) @@ -142,11 +140,10 @@ async def test_delegates_to_low_level_and_returns_job(self, exports_api): stop_time=STOP, channel_ids=["ch-1"], calculated_channels=None, - use_legacy_format=True, simplify_channel_names=True, combine_runs=True, split_export_by_asset=True, - split_export_by_run=True, + split_export_by_run=False, ) @pytest.mark.asyncio @@ -164,7 +161,6 @@ async def test_minimal_args(self, exports_api): stop_time=None, channel_ids=[], calculated_channels=None, - use_legacy_format=False, simplify_channel_names=False, combine_runs=False, split_export_by_asset=False, @@ -257,6 +253,17 @@ async def test_stop_without_start_raises(self, exports_api): stop_time=STOP, ) + @pytest.mark.asyncio + async def test_combine_runs_with_split_by_run_raises(self, exports_api): + """Test that enabling both combine_runs and split_export_by_run raises ValueError.""" + with pytest.raises(ValueError, match="combine_runs.*split_export_by_run"): + await exports_api.export_by_run( + runs=["run-1"], + output_format=ExportOutputFormat.CSV, + combine_runs=True, + split_export_by_run=True, + ) + class TestExportByAsset: """Tests for the export_by_asset method.""" @@ -279,7 +286,6 @@ async def test_delegates_to_low_level_and_returns_job(self, exports_api): output_format=ExportOutputFormat.CSV, channel_ids=["ch-1", "ch-2"], calculated_channels=None, - use_legacy_format=False, simplify_channel_names=False, combine_runs=False, split_export_by_asset=False, @@ -335,6 +341,19 @@ async def test_start_after_stop_raises(self, exports_api): output_format=ExportOutputFormat.CSV, ) + @pytest.mark.asyncio + async def test_combine_runs_with_split_by_run_raises(self, exports_api): + """Test that enabling both combine_runs and split_export_by_run raises ValueError.""" + with pytest.raises(ValueError, match="combine_runs.*split_export_by_run"): + await exports_api.export_by_asset( + assets=["asset-1"], + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.CSV, + combine_runs=True, + split_export_by_run=True, + ) + class TestExportByTimeRange: """Tests for the export_by_time_range method.""" @@ -354,7 +373,6 @@ async def test_delegates_to_low_level_with_channels(self, exports_api): output_format=ExportOutputFormat.SUN, channel_ids=["ch-1"], calculated_channels=None, - use_legacy_format=False, simplify_channel_names=False, combine_runs=False, split_export_by_asset=False, @@ -396,26 +414,45 @@ async def test_start_after_stop_raises(self, exports_api): channels=["ch-1"], ) + @pytest.mark.asyncio + async def test_combine_runs_with_split_by_run_raises(self, exports_api): + """Test that enabling both combine_runs and split_export_by_run raises ValueError.""" + with pytest.raises(ValueError, match="combine_runs.*split_export_by_run"): + await exports_api.export_by_time_range( + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.CSV, + channels=["ch-1"], + combine_runs=True, + split_export_by_run=True, + ) + class TestWaitUntilComplete: """Tests for the wait_until_complete method.""" @pytest.mark.asyncio - async def test_returns_download_url_on_success(self, exports_api, mock_client): - """Test that a finished job returns the download URL.""" + async def test_returns_file_paths_on_success(self, exports_api, mock_client, tmp_path): + """Test that a finished job downloads files and returns their paths.""" mock_job = MagicMock(spec=Job) mock_job._id_or_error = "job-123" - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=mock_job) - exports_api._low_level_client.get_download_url = AsyncMock( - return_value="https://download.test/export.zip" - ) completed_job = MagicMock(spec=Job) completed_job.job_status = JobStatus.FINISHED mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) + exports_api._low_level_client.get_download_url = AsyncMock( + return_value="https://download.test/export.zip" + ) + + fake_file = tmp_path / "data.csv" + fake_file.write_text("col1,col2\n1,2") - url = await exports_api.wait_until_complete(job=mock_job) + mock_loop = MagicMock() + mock_loop.run_in_executor = AsyncMock(return_value=None) - assert url == "https://download.test/export.zip" + with patch("asyncio.get_event_loop", return_value=mock_loop): + result = await exports_api.wait_until_complete(job=mock_job, output_dir=tmp_path) + + assert result == [fake_file] mock_client.async_.jobs.wait_until_complete.assert_awaited_once_with( job="job-123", polling_interval_secs=5, timeout_secs=None ) @@ -424,7 +461,7 @@ async def test_returns_download_url_on_success(self, exports_api, mock_client): ) @pytest.mark.asyncio - async def test_accepts_job_id_string(self, exports_api, mock_client): + async def test_accepts_job_id_string(self, exports_api, mock_client, tmp_path): """Test that a raw job_id string is accepted.""" completed_job = MagicMock(spec=Job) completed_job.job_status = JobStatus.FINISHED @@ -433,15 +470,22 @@ async def test_accepts_job_id_string(self, exports_api, mock_client): return_value="https://download.test/export.zip" ) - url = await exports_api.wait_until_complete(job="job-456") + fake_file = tmp_path / "data.csv" + fake_file.write_text("col1,col2\n1,2") + + mock_loop = MagicMock() + mock_loop.run_in_executor = AsyncMock(return_value=None) - assert url == "https://download.test/export.zip" + with patch("asyncio.get_event_loop", return_value=mock_loop): + result = await exports_api.wait_until_complete(job="job-456", output_dir=tmp_path) + + assert result == [fake_file] mock_client.async_.jobs.wait_until_complete.assert_awaited_once_with( job="job-456", polling_interval_secs=5, timeout_secs=None ) @pytest.mark.asyncio - async def test_custom_polling_and_timeout(self, exports_api, mock_client): + async def test_custom_polling_and_timeout(self, exports_api, mock_client, tmp_path): """Test that polling_interval_secs and timeout_secs are forwarded.""" mock_job = MagicMock(spec=Job) mock_job._id_or_error = "job-123" @@ -453,9 +497,13 @@ async def test_custom_polling_and_timeout(self, exports_api, mock_client): return_value="https://download.test/export.zip" ) - await exports_api.wait_until_complete( - job=mock_job, polling_interval_secs=1, timeout_secs=10 - ) + mock_loop = MagicMock() + mock_loop.run_in_executor = AsyncMock(return_value=None) + + with patch("asyncio.get_event_loop", return_value=mock_loop): + await exports_api.wait_until_complete( + job=mock_job, polling_interval_secs=1, timeout_secs=10, output_dir=tmp_path + ) mock_client.async_.jobs.wait_until_complete.assert_awaited_once_with( job="job-123", polling_interval_secs=1, timeout_secs=10 diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index c4b6e51a0..821901f10 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -1,7 +1,13 @@ from __future__ import annotations +import asyncio +import tempfile +import zipfile +from pathlib import Path from typing import TYPE_CHECKING +import requests + from sift_client._internal.low_level_wrappers.exports import ExportsLowLevelClient from sift_client.resources._base import ResourceBase from sift_client.sift_types.asset import Asset @@ -27,7 +33,7 @@ class ExportsAPIAsync(ResourceBase): - ``export_by_time_range`` - Export data within a time range (requires channels or calculated_channels). Each method initiates the export and returns a Job handle. Use ``wait_until_complete`` - to poll the job and retrieve the download URL. + to poll the job, download the export, and get the paths to the extracted files. Example:: @@ -39,7 +45,7 @@ class ExportsAPIAsync(ResourceBase): runs=[run], output_format=ExportOutputFormat.CSV, ) - url = await client.async_.exports.wait_until_complete(job=job) + files = await client.async_.exports.wait_until_complete(job=job) # Export by asset with time range asset = await client.async_.assets.get(asset_id="asset-id-1") @@ -49,7 +55,7 @@ class ExportsAPIAsync(ResourceBase): stop_time=stop, output_format=ExportOutputFormat.CSV, ) - url = await client.async_.exports.wait_until_complete(job=job) + files = await client.async_.exports.wait_until_complete(job=job) """ def __init__(self, sift_client: SiftClient): @@ -107,6 +113,12 @@ async def export_by_run( raise ValueError("'start_time' and 'stop_time' must both be provided or both omitted.") if start_time and stop_time and start_time >= stop_time: raise ValueError("'start_time' must be before 'stop_time'.") + if combine_runs and split_export_by_run: + raise ValueError( + "'combine_runs' cannot be used with 'split_export_by_run'. " + "Combining merges identical channels across runs into a single column, " + "which is not possible when each run is split into a separate file." + ) run_ids = [r._id_or_error if isinstance(r, Run) else r for r in runs] channel_ids = ( @@ -171,6 +183,12 @@ async def export_by_asset( raise ValueError("'assets' must not contain empty or null values.") if start_time >= stop_time: raise ValueError("'start_time' must be before 'stop_time'.") + if combine_runs and split_export_by_run: + raise ValueError( + "'combine_runs' cannot be used with 'split_export_by_run'. " + "Combining merges identical channels across runs into a single column, " + "which is not possible when each run is split into a separate file." + ) asset_ids = [a._id_or_error if isinstance(a, Asset) else a for a in assets] channel_ids = ( @@ -238,6 +256,12 @@ async def export_by_time_range( ) if start_time >= stop_time: raise ValueError("'start_time' must be before 'stop_time'.") + if combine_runs and split_export_by_run: + raise ValueError( + "'combine_runs' cannot be used with 'split_export_by_run'. " + "Combining merges identical channels across runs into a single column, " + "which is not possible when each run is split into a separate file." + ) channel_ids = ( [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] @@ -263,19 +287,22 @@ async def wait_until_complete( job: Job | str, polling_interval_secs: int = 5, timeout_secs: int | None = None, - ) -> str: - """Wait for an export job to complete and return the download URL. + output_dir: str | Path | None = None, + ) -> list[Path]: + """Wait for an export job to complete and download the exported files. Polls the job status at the given interval until the job is FINISHED, - FAILED, or CANCELLED. + FAILED, or CANCELLED, then downloads and extracts the exported data files. Args: job: The export Job or job ID to wait for. polling_interval_secs: Seconds between status polls. Defaults to 5. timeout_secs: Maximum seconds to wait. If None, polls indefinitely. + output_dir: Directory to save the extracted files. If omitted, a + temporary directory is created automatically. Returns: - A presigned download URL for the exported zip file. + List of paths to the extracted data files. Raises: RuntimeError: If the export job fails or is cancelled. @@ -296,9 +323,37 @@ async def wait_until_complete( and completed_job.job_status_details.error_message ): raise RuntimeError( - f"Export job '{job_id}' failed: {completed_job.job_status_details.error_message}" + f"Export job '{job_id}' failed. {completed_job.job_status_details.error_message}" ) raise RuntimeError(f"Export job '{job_id}' failed.") if completed_job.job_status == JobStatus.CANCELLED: raise RuntimeError(f"Export job '{job_id}' was cancelled.") - return await self._low_level_client.get_download_url(job_id=job_id) + + presigned_url = await self._low_level_client.get_download_url(job_id=job_id) + output_dir = ( + Path(output_dir) + if output_dir is not None + else Path(tempfile.mkdtemp(prefix="sift_export_")) + ) + zip_path = output_dir / f"{job_id}.zip" + + # Run the synchronous request in a thread pool to avoid blocking the event loop + loop = asyncio.get_event_loop() + await loop.run_in_executor( + None, ExportsAPIAsync._download_and_extract, presigned_url, zip_path, output_dir + ) + + return [f for f in output_dir.iterdir() if f.is_file()] + + @staticmethod + def _download_and_extract(url: str, zip_path: Path, output_dir: Path) -> None: + output_dir.mkdir(parents=True, exist_ok=True) + with requests.get(url=url, stream=True) as response: + response.raise_for_status() + with zip_path.open("wb") as file: + for chunk in response.iter_content(chunk_size=4194304): # 4 MiB + if chunk: + file.write(chunk) + with zipfile.ZipFile(zip_path, "r") as zip_file: + zip_file.extractall(output_dir) + zip_path.unlink() diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index 9df53f9e9..5b044ddfc 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -550,7 +550,7 @@ class ExportsAPI: - ``export_by_time_range`` - Export data within a time range (requires channels or calculated_channels). Each method initiates the export and returns a Job handle. Use ``wait_until_complete`` - to poll the job and retrieve the download URL. + to poll the job, download the export, and get the paths to the extracted files. Example:: @@ -562,7 +562,7 @@ class ExportsAPI: runs=[run], output_format=ExportOutputFormat.CSV, ) - url = await client.async_.exports.wait_until_complete(job=job) + files = await client.async_.exports.wait_until_complete(job=job) # Export by asset with time range asset = await client.async_.assets.get(asset_id="asset-id-1") @@ -572,7 +572,7 @@ class ExportsAPI: stop_time=stop, output_format=ExportOutputFormat.CSV, ) - url = await client.async_.exports.wait_until_complete(job=job) + files = await client.async_.exports.wait_until_complete(job=job) """ def __init__(self, sift_client: SiftClient): @@ -593,7 +593,6 @@ class ExportsAPI: output_format: ExportOutputFormat, channels: list[str | Channel] | None = None, calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, split_export_by_asset: bool = False, @@ -614,7 +613,6 @@ class ExportsAPI: output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). channels: Optional list of Channel objects or channel IDs to include. If omitted, all channels are exported. calculated_channels: Optional calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. - use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. @@ -634,7 +632,6 @@ class ExportsAPI: stop_time: datetime | None = None, channels: list[str | Channel] | None = None, calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, split_export_by_asset: bool = False, @@ -656,7 +653,6 @@ class ExportsAPI: stop_time: Optional stop time to narrow the export within the run(s). channels: Optional list of Channel objects or channel IDs to include. If omitted, all channels are exported. calculated_channels: Optional calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. - use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. @@ -675,7 +671,6 @@ class ExportsAPI: output_format: ExportOutputFormat, channels: list[str | Channel] | None = None, calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - use_legacy_format: bool = False, simplify_channel_names: bool = False, combine_runs: bool = False, split_export_by_asset: bool = False, @@ -696,7 +691,6 @@ class ExportsAPI: output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). channels: List of Channel objects or channel IDs to include in the export. calculated_channels: Calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. - use_legacy_format: Use legacy channel name display format: ``channel.name (assetName=... runName=... runId=...)``. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. @@ -711,20 +705,27 @@ class ExportsAPI: ... def wait_until_complete( - self, *, job: Job | str, polling_interval_secs: int = 5, timeout_secs: int | None = None - ) -> str: - """Wait for an export job to complete and return the download URL. + self, + *, + job: Job | str, + polling_interval_secs: int = 5, + timeout_secs: int | None = None, + output_dir: str | Path | None = None, + ) -> list[Path]: + """Wait for an export job to complete and download the exported files. Polls the job status at the given interval until the job is FINISHED, - FAILED, or CANCELLED. + FAILED, or CANCELLED, then downloads and extracts the exported data files. Args: job: The export Job or job ID to wait for. polling_interval_secs: Seconds between status polls. Defaults to 5. timeout_secs: Maximum seconds to wait. If None, polls indefinitely. + output_dir: Directory to save the extracted files. If omitted, a + temporary directory is created automatically. Returns: - A presigned download URL for the exported zip file. + List of paths to the extracted data files. Raises: RuntimeError: If the export job fails or is cancelled. From 753cd67f2dcecf481d432e77f9ff02344f820732 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 17 Mar 2026 15:32:26 -0700 Subject: [PATCH 21/53] python(fix): resolve calc channel name-based identifiers to uuid for exports --- .../_tests/resources/test_exports.py | 457 +++++++++++------- python/lib/sift_client/resources/exports.py | 70 ++- 2 files changed, 352 insertions(+), 175 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 59c917d14..53c2f55a4 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -17,7 +17,11 @@ from sift_client._internal.low_level_wrappers.exports import _build_calc_channel_configs from sift_client.resources.exports import ExportsAPIAsync from sift_client.sift_types.asset import Asset -from sift_client.sift_types.calculated_channel import CalculatedChannelCreate, ChannelReference +from sift_client.sift_types.calculated_channel import ( + CalculatedChannel, + CalculatedChannelCreate, + ChannelReference, +) from sift_client.sift_types.channel import Channel from sift_client.sift_types.export import ExportOutputFormat from sift_client.sift_types.job import DataExportStatusDetails, Job, JobStatus @@ -34,6 +38,8 @@ def mock_client(): client.grpc_client = MagicMock() client.async_ = MagicMock() client.async_.jobs = MagicMock() + client.async_.channels = MagicMock() + client.async_.channels.find = AsyncMock(return_value=None) return client @@ -52,9 +58,7 @@ def exports_api(mock_client, mock_job): with patch("sift_client.resources.exports.ExportsLowLevelClient", autospec=True) as mock_ll: api = ExportsAPIAsync(mock_client) api._low_level_client = mock_ll.return_value - # Default: low-level export_data returns a job_id api._low_level_client.export_data = AsyncMock(return_value="job-123") - # Default: jobs.get returns a mock Job mock_client.async_.jobs.get = AsyncMock(return_value=mock_job) return api @@ -82,19 +86,66 @@ def sample_calc_channels(): ] +@pytest.fixture +def mock_calculated_channel(): + """Create a mock fetched CalculatedChannel with name-based channel_identifier.""" + cc = MagicMock(spec=CalculatedChannel) + cc.name = "my_calc" + cc.expression = "$1 + 10" + cc.units = "m/s" + cc.asset_ids = ["asset-1"] + cc.channel_references = [ + ChannelReference(channel_reference="$1", channel_identifier="sensor.velocity"), + ] + return cc + + +@pytest.fixture +def mock_resolved_channel(): + """Create a mock Channel returned by channels.find during resolution.""" + ch = MagicMock(spec=Channel) + ch._id_or_error = "resolved-ch-uuid" + return ch + + +@pytest.fixture +def completed_export_setup(exports_api, mock_client, tmp_path): + """Set up mocks for a successful wait_until_complete call. + + Returns a dict with the exports_api, mock_client, tmp_path, and fake_file. + """ + completed_job = MagicMock(spec=Job) + completed_job.job_status = JobStatus.FINISHED + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) + exports_api._low_level_client.get_download_url = AsyncMock( + return_value="https://download.test/export.zip" + ) + + fake_file = tmp_path / "data.csv" + fake_file.write_text("col1,col2\n1,2") + + mock_loop = MagicMock() + mock_loop.run_in_executor = AsyncMock(return_value=None) + + return { + "api": exports_api, + "client": mock_client, + "tmp_path": tmp_path, + "fake_file": fake_file, + "mock_loop": mock_loop, + } + + class TestBuildCalcChannelConfigs: """Tests for the _build_calc_channel_configs helper in the low-level client.""" - def test_returns_empty_list_for_none(self): - """Test that None input returns an empty list.""" - assert _build_calc_channel_configs(None) == [] + @pytest.mark.parametrize("input_val", [None, []]) + def test_returns_empty_list_for_empty_input(self, input_val): + """Test that None or empty list returns an empty list.""" + assert _build_calc_channel_configs(input_val) == [] - def test_returns_empty_list_for_empty_list(self): - """Test that an empty list returns an empty list.""" - assert _build_calc_channel_configs([]) == [] - - def test_converts_to_proto(self, sample_calc_channels): - """Test converting CalculatedChannelCreate objects to proto CalculatedChannelConfig messages.""" + def test_converts_create_objects_to_proto(self, sample_calc_channels): + """Test converting CalculatedChannelCreate objects to proto CalculatedChannelConfig.""" result = _build_calc_channel_configs(sample_calc_channels) assert len(result) == 2 @@ -111,6 +162,31 @@ def test_converts_to_proto(self, sample_calc_channels): assert second.units == "" # proto default for unset optional string assert len(second.channel_references) == 2 + def test_converts_existing_calculated_channel_to_proto(self): + """Test converting an existing CalculatedChannel (full model) to proto. + + Exercises the else-branch that reads from 'channel_references' + instead of 'expression_channel_references'. + """ + mock_cc = MagicMock(spec=CalculatedChannel) + mock_cc.name = "derived_speed" + mock_cc.expression = "$1 / $2" + mock_cc.channel_references = [ + ChannelReference(channel_reference="$1", channel_identifier="ch-distance"), + ChannelReference(channel_reference="$2", channel_identifier="ch-time"), + ] + mock_cc.units = "m/s" + + result = _build_calc_channel_configs([mock_cc]) + assert len(result) == 1 + config = result[0] + assert config.name == "derived_speed" + assert config.expression == "$1 / $2" + assert config.units == "m/s" + assert len(config.channel_references) == 2 + assert config.channel_references[0].channel_identifier == "ch-distance" + assert config.channel_references[1].channel_identifier == "ch-time" + class TestExportsAPIAsync: """Tests for the ExportsAPIAsync high-level client.""" @@ -210,32 +286,18 @@ async def test_resolves_channel_objects_to_ids(self, exports_api): @pytest.mark.asyncio async def test_empty_runs_raises(self, exports_api): - """Test that an empty runs list raises ValueError.""" with pytest.raises(ValueError, match="runs"): await exports_api.export_by_run(runs=[], output_format=ExportOutputFormat.CSV) @pytest.mark.asyncio async def test_null_run_raises(self, exports_api): - """Test that a runs list containing an empty string raises ValueError.""" with pytest.raises(ValueError, match="empty or null"): await exports_api.export_by_run( runs=["", "run-1"], output_format=ExportOutputFormat.CSV ) - @pytest.mark.asyncio - async def test_start_after_stop_raises(self, exports_api): - """Test that start_time >= stop_time raises ValueError.""" - with pytest.raises(ValueError, match="start_time"): - await exports_api.export_by_run( - runs=["run-1"], - output_format=ExportOutputFormat.CSV, - start_time=STOP, - stop_time=START, - ) - @pytest.mark.asyncio async def test_start_without_stop_raises(self, exports_api): - """Test that providing start_time without stop_time raises ValueError.""" with pytest.raises(ValueError, match="both be provided or both omitted"): await exports_api.export_by_run( runs=["run-1"], @@ -245,7 +307,6 @@ async def test_start_without_stop_raises(self, exports_api): @pytest.mark.asyncio async def test_stop_without_start_raises(self, exports_api): - """Test that providing stop_time without start_time raises ValueError.""" with pytest.raises(ValueError, match="both be provided or both omitted"): await exports_api.export_by_run( runs=["run-1"], @@ -253,23 +314,11 @@ async def test_stop_without_start_raises(self, exports_api): stop_time=STOP, ) - @pytest.mark.asyncio - async def test_combine_runs_with_split_by_run_raises(self, exports_api): - """Test that enabling both combine_runs and split_export_by_run raises ValueError.""" - with pytest.raises(ValueError, match="combine_runs.*split_export_by_run"): - await exports_api.export_by_run( - runs=["run-1"], - output_format=ExportOutputFormat.CSV, - combine_runs=True, - split_export_by_run=True, - ) - class TestExportByAsset: """Tests for the export_by_asset method.""" @pytest.mark.asyncio async def test_delegates_to_low_level_and_returns_job(self, exports_api): - """Test that export_by_asset passes correct args to low-level and returns a Job.""" job = await exports_api.export_by_asset( assets=["asset-1"], start_time=START, @@ -292,9 +341,22 @@ async def test_delegates_to_low_level_and_returns_job(self, exports_api): split_export_by_run=False, ) + @pytest.mark.asyncio + async def test_with_calculated_channels(self, exports_api, sample_calc_channels): + await exports_api.export_by_asset( + assets=["asset-1"], + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.CSV, + calculated_channels=sample_calc_channels, + ) + + call_kwargs = exports_api._low_level_client.export_data.call_args.kwargs + assert call_kwargs["calculated_channels"] == sample_calc_channels + assert call_kwargs["channel_ids"] == [] + @pytest.mark.asyncio async def test_resolves_asset_objects_to_ids(self, exports_api): - """Test that Asset domain objects are resolved to their IDs.""" mock_asset = MagicMock(spec=Asset) mock_asset._id_or_error = "resolved-asset-id" @@ -310,7 +372,6 @@ async def test_resolves_asset_objects_to_ids(self, exports_api): @pytest.mark.asyncio async def test_empty_assets_raises(self, exports_api): - """Test that an empty assets list raises ValueError.""" with pytest.raises(ValueError, match="assets"): await exports_api.export_by_asset( assets=[], @@ -321,7 +382,6 @@ async def test_empty_assets_raises(self, exports_api): @pytest.mark.asyncio async def test_null_asset_raises(self, exports_api): - """Test that an assets list containing an empty string raises ValueError.""" with pytest.raises(ValueError, match="empty or null"): await exports_api.export_by_asset( assets=[""], @@ -330,36 +390,11 @@ async def test_null_asset_raises(self, exports_api): output_format=ExportOutputFormat.CSV, ) - @pytest.mark.asyncio - async def test_start_after_stop_raises(self, exports_api): - """Test that start_time >= stop_time raises ValueError.""" - with pytest.raises(ValueError, match="start_time"): - await exports_api.export_by_asset( - assets=["asset-1"], - start_time=STOP, - stop_time=START, - output_format=ExportOutputFormat.CSV, - ) - - @pytest.mark.asyncio - async def test_combine_runs_with_split_by_run_raises(self, exports_api): - """Test that enabling both combine_runs and split_export_by_run raises ValueError.""" - with pytest.raises(ValueError, match="combine_runs.*split_export_by_run"): - await exports_api.export_by_asset( - assets=["asset-1"], - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.CSV, - combine_runs=True, - split_export_by_run=True, - ) - class TestExportByTimeRange: """Tests for the export_by_time_range method.""" @pytest.mark.asyncio async def test_delegates_to_low_level_with_channels(self, exports_api): - """Test that export_by_time_range passes correct args to low-level.""" await exports_api.export_by_time_range( start_time=START, stop_time=STOP, @@ -383,7 +418,6 @@ async def test_delegates_to_low_level_with_channels(self, exports_api): async def test_delegates_to_low_level_with_calc_channels( self, exports_api, sample_calc_channels ): - """Test that calculated channels are passed through to the low-level client.""" await exports_api.export_by_time_range( start_time=START, stop_time=STOP, @@ -397,157 +431,240 @@ async def test_delegates_to_low_level_with_calc_channels( @pytest.mark.asyncio async def test_no_channels_raises(self, exports_api): - """Test that omitting both channels and calculated_channels raises ValueError.""" with pytest.raises(ValueError, match=r"channels.*calculated_channels"): await exports_api.export_by_time_range( start_time=START, stop_time=STOP, output_format=ExportOutputFormat.CSV ) + class TestSharedValidation: + """Validation rules shared across all three export methods.""" + @pytest.mark.asyncio - async def test_start_after_stop_raises(self, exports_api): - """Test that start_time >= stop_time raises ValueError.""" + @pytest.mark.parametrize( + ("method", "kwargs"), + [ + ("export_by_run", {"runs": ["r-1"], "output_format": ExportOutputFormat.CSV}), + ("export_by_asset", {"assets": ["a-1"], "output_format": ExportOutputFormat.CSV}), + ( + "export_by_time_range", + {"output_format": ExportOutputFormat.CSV, "channels": ["ch-1"]}, + ), + ], + ) + async def test_start_after_stop_raises(self, exports_api, method, kwargs): with pytest.raises(ValueError, match="start_time"): - await exports_api.export_by_time_range( - start_time=STOP, - stop_time=START, - output_format=ExportOutputFormat.CSV, - channels=["ch-1"], - ) + await getattr(exports_api, method)(start_time=STOP, stop_time=START, **kwargs) @pytest.mark.asyncio - async def test_combine_runs_with_split_by_run_raises(self, exports_api): - """Test that enabling both combine_runs and split_export_by_run raises ValueError.""" + @pytest.mark.parametrize( + ("method", "kwargs"), + [ + ("export_by_run", {"runs": ["r-1"], "output_format": ExportOutputFormat.CSV}), + ( + "export_by_asset", + { + "assets": ["a-1"], + "output_format": ExportOutputFormat.CSV, + "start_time": START, + "stop_time": STOP, + }, + ), + ( + "export_by_time_range", + { + "output_format": ExportOutputFormat.CSV, + "channels": ["ch-1"], + "start_time": START, + "stop_time": STOP, + }, + ), + ], + ) + async def test_combine_runs_with_split_by_run_raises(self, exports_api, method, kwargs): with pytest.raises(ValueError, match="combine_runs.*split_export_by_run"): - await exports_api.export_by_time_range( - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.CSV, - channels=["ch-1"], - combine_runs=True, - split_export_by_run=True, + await getattr(exports_api, method)( + combine_runs=True, split_export_by_run=True, **kwargs ) - class TestWaitUntilComplete: - """Tests for the wait_until_complete method.""" + class TestResolveCalculatedChannels: + """Tests for the _resolve_calculated_channels helper.""" @pytest.mark.asyncio - async def test_returns_file_paths_on_success(self, exports_api, mock_client, tmp_path): - """Test that a finished job downloads files and returns their paths.""" - mock_job = MagicMock(spec=Job) - mock_job._id_or_error = "job-123" + async def test_passes_through_none(self, exports_api): + result = await exports_api._resolve_calculated_channels(None) + assert result is None - completed_job = MagicMock(spec=Job) - completed_job.job_status = JobStatus.FINISHED - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) - exports_api._low_level_client.get_download_url = AsyncMock( - return_value="https://download.test/export.zip" + @pytest.mark.asyncio + async def test_preserves_objects_when_identifiers_not_found( + self, exports_api, sample_calc_channels + ): + """channels.find returns None → identifiers assumed to be UUIDs, objects preserved.""" + result = await exports_api._resolve_calculated_channels(sample_calc_channels) + assert result[0] is sample_calc_channels[0] + assert result[1] is sample_calc_channels[1] + + @pytest.mark.asyncio + async def test_resolves_fetched_calculated_channel( + self, exports_api, mock_client, mock_calculated_channel, mock_resolved_channel + ): + """A fetched CalculatedChannel's name-based identifier is resolved to a UUID.""" + mock_client.async_.channels.find = AsyncMock(return_value=mock_resolved_channel) + + result = await exports_api._resolve_calculated_channels([mock_calculated_channel]) + + assert len(result) == 1 + resolved = result[0] + assert isinstance(resolved, CalculatedChannelCreate) + assert resolved.name == "my_calc" + assert resolved.expression == "$1 + 10" + assert resolved.units == "m/s" + assert ( + resolved.expression_channel_references[0].channel_identifier == "resolved-ch-uuid" + ) + mock_client.async_.channels.find.assert_awaited_once_with( + name="sensor.velocity", assets=["asset-1"] ) - fake_file = tmp_path / "data.csv" - fake_file.write_text("col1,col2\n1,2") + @pytest.mark.asyncio + async def test_keeps_identifier_when_not_found(self, exports_api, mock_calculated_channel): + """channels.find returns None → identifier kept as-is, original object preserved.""" + mock_calculated_channel.channel_references = [ + ChannelReference( + channel_reference="$1", + channel_identifier="d8e64798-ad6f-41b8-b830-7e009806f365", + ), + ] + + result = await exports_api._resolve_calculated_channels([mock_calculated_channel]) + assert result[0] is mock_calculated_channel - mock_loop = MagicMock() - mock_loop.run_in_executor = AsyncMock(return_value=None) + @pytest.mark.asyncio + async def test_resolves_create_object_with_name_identifier( + self, exports_api, mock_client, mock_resolved_channel + ): + """A CalculatedChannelCreate with a name-based identifier gets resolved.""" + mock_resolved_channel._id_or_error = "d8e64798-ad6f-41b8-b830-7e009806f365" + mock_client.async_.channels.find = AsyncMock(return_value=mock_resolved_channel) + + inline_cc = CalculatedChannelCreate( + name="inline_calc", + expression="$1 + 30", + expression_channel_references=[ + ChannelReference( + channel_reference="$1", channel_identifier="DiningRoomLight.rssi" + ), + ], + ) - with patch("asyncio.get_event_loop", return_value=mock_loop): - result = await exports_api.wait_until_complete(job=mock_job, output_dir=tmp_path) + result = await exports_api._resolve_calculated_channels([inline_cc]) - assert result == [fake_file] - mock_client.async_.jobs.wait_until_complete.assert_awaited_once_with( - job="job-123", polling_interval_secs=5, timeout_secs=None + resolved = result[0] + assert isinstance(resolved, CalculatedChannelCreate) + assert ( + resolved.expression_channel_references[0].channel_identifier + == "d8e64798-ad6f-41b8-b830-7e009806f365" ) - exports_api._low_level_client.get_download_url.assert_awaited_once_with( - job_id="job-123" + mock_client.async_.channels.find.assert_awaited_once_with( + name="DiningRoomLight.rssi", assets=None ) @pytest.mark.asyncio - async def test_accepts_job_id_string(self, exports_api, mock_client, tmp_path): - """Test that a raw job_id string is accepted.""" - completed_job = MagicMock(spec=Job) - completed_job.job_status = JobStatus.FINISHED - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) - exports_api._low_level_client.get_download_url = AsyncMock( - return_value="https://download.test/export.zip" - ) - - fake_file = tmp_path / "data.csv" - fake_file.write_text("col1,col2\n1,2") + async def test_mixed_create_and_existing( + self, + exports_api, + mock_client, + sample_calc_channels, + mock_calculated_channel, + mock_resolved_channel, + ): + """Mix of CalculatedChannelCreate and CalculatedChannel resolves only names.""" + mock_calculated_channel.channel_references = [ + ChannelReference(channel_reference="$1", channel_identifier="sensor.rpm"), + ] + mock_resolved_channel._id_or_error = "rpm-uuid" - mock_loop = MagicMock() - mock_loop.run_in_executor = AsyncMock(return_value=None) + async def find_side_effect(name, assets=None): + return mock_resolved_channel if name == "sensor.rpm" else None - with patch("asyncio.get_event_loop", return_value=mock_loop): - result = await exports_api.wait_until_complete(job="job-456", output_dir=tmp_path) + mock_client.async_.channels.find = AsyncMock(side_effect=find_side_effect) - assert result == [fake_file] - mock_client.async_.jobs.wait_until_complete.assert_awaited_once_with( - job="job-456", polling_interval_secs=5, timeout_secs=None + result = await exports_api._resolve_calculated_channels( + [sample_calc_channels[0], mock_calculated_channel] ) + assert len(result) == 2 + assert result[0] is sample_calc_channels[0] + assert isinstance(result[1], CalculatedChannelCreate) + assert result[1].expression_channel_references[0].channel_identifier == "rpm-uuid" + + class TestWaitUntilComplete: + """Tests for the wait_until_complete method.""" + @pytest.mark.asyncio - async def test_custom_polling_and_timeout(self, exports_api, mock_client, tmp_path): - """Test that polling_interval_secs and timeout_secs are forwarded.""" + async def test_returns_file_paths_on_success(self, completed_export_setup): + s = completed_export_setup mock_job = MagicMock(spec=Job) mock_job._id_or_error = "job-123" - completed_job = MagicMock(spec=Job) - completed_job.job_status = JobStatus.FINISHED - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) - exports_api._low_level_client.get_download_url = AsyncMock( - return_value="https://download.test/export.zip" - ) - - mock_loop = MagicMock() - mock_loop.run_in_executor = AsyncMock(return_value=None) - - with patch("asyncio.get_event_loop", return_value=mock_loop): - await exports_api.wait_until_complete( - job=mock_job, polling_interval_secs=1, timeout_secs=10, output_dir=tmp_path - ) + with patch("asyncio.get_event_loop", return_value=s["mock_loop"]): + result = await s["api"].wait_until_complete(job=mock_job, output_dir=s["tmp_path"]) - mock_client.async_.jobs.wait_until_complete.assert_awaited_once_with( - job="job-123", polling_interval_secs=1, timeout_secs=10 + assert result == [s["fake_file"]] + s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( + job="job-123", polling_interval_secs=5, timeout_secs=None ) + s["api"]._low_level_client.get_download_url.assert_awaited_once_with(job_id="job-123") @pytest.mark.asyncio - async def test_failed_job_raises_with_reason(self, exports_api, mock_client): - """Test that a failed job raises RuntimeError with the error message.""" - mock_job = MagicMock(spec=Job) - mock_job._id_or_error = "job-fail" + async def test_accepts_job_id_string(self, completed_export_setup): + s = completed_export_setup - completed_job = MagicMock(spec=Job) - completed_job.job_status = JobStatus.FAILED - completed_job.job_status_details = DataExportStatusDetails( - error_message="out of memory" - ) - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) + with patch("asyncio.get_event_loop", return_value=s["mock_loop"]): + result = await s["api"].wait_until_complete(job="job-456", output_dir=s["tmp_path"]) - with pytest.raises(RuntimeError, match=r"failed.*out of memory"): - await exports_api.wait_until_complete(job=mock_job) + assert result == [s["fake_file"]] + s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( + job="job-456", polling_interval_secs=5, timeout_secs=None + ) @pytest.mark.asyncio - async def test_failed_job_raises_without_reason(self, exports_api, mock_client): - """Test that a failed job with no status details still raises RuntimeError.""" + async def test_custom_polling_and_timeout(self, completed_export_setup): + s = completed_export_setup mock_job = MagicMock(spec=Job) - mock_job._id_or_error = "job-fail" + mock_job._id_or_error = "job-123" - completed_job = MagicMock(spec=Job) - completed_job.job_status = JobStatus.FAILED - completed_job.job_status_details = None - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) + with patch("asyncio.get_event_loop", return_value=s["mock_loop"]): + await s["api"].wait_until_complete( + job=mock_job, polling_interval_secs=1, timeout_secs=10, output_dir=s["tmp_path"] + ) - with pytest.raises(RuntimeError, match="failed"): - await exports_api.wait_until_complete(job=mock_job) + s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( + job="job-123", polling_interval_secs=1, timeout_secs=10 + ) @pytest.mark.asyncio - async def test_cancelled_job_raises(self, exports_api, mock_client): - """Test that a cancelled job raises RuntimeError.""" + @pytest.mark.parametrize( + ("status", "details", "match"), + [ + ( + JobStatus.FAILED, + DataExportStatusDetails(error_message="out of memory"), + r"failed.*out of memory", + ), + (JobStatus.FAILED, None, "failed"), + (JobStatus.CANCELLED, None, "cancelled"), + ], + ) + async def test_terminal_job_status_raises( + self, exports_api, mock_client, status, details, match + ): mock_job = MagicMock(spec=Job) - mock_job._id_or_error = "job-cancel" + mock_job._id_or_error = "job-err" completed_job = MagicMock(spec=Job) - completed_job.job_status = JobStatus.CANCELLED + completed_job.job_status = status + completed_job.job_status_details = details mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) - with pytest.raises(RuntimeError, match="cancelled"): + with pytest.raises(RuntimeError, match=match): await exports_api.wait_until_complete(job=mock_job) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 821901f10..f95bb8319 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -11,7 +11,8 @@ from sift_client._internal.low_level_wrappers.exports import ExportsLowLevelClient from sift_client.resources._base import ResourceBase from sift_client.sift_types.asset import Asset -from sift_client.sift_types.channel import Channel +from sift_client.sift_types.calculated_channel import CalculatedChannel, CalculatedChannelCreate +from sift_client.sift_types.channel import Channel, ChannelReference from sift_client.sift_types.export import ExportOutputFormat # noqa: TC001 from sift_client.sift_types.job import Job from sift_client.sift_types.run import Run @@ -20,7 +21,6 @@ from datetime import datetime from sift_client.client import SiftClient - from sift_client.sift_types.calculated_channel import CalculatedChannel, CalculatedChannelCreate class ExportsAPIAsync(ResourceBase): @@ -67,6 +67,63 @@ def __init__(self, sift_client: SiftClient): super().__init__(sift_client) self._low_level_client = ExportsLowLevelClient(grpc_client=self.client.grpc_client) + async def _resolve_calculated_channels( + self, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None, + ) -> list[CalculatedChannel | CalculatedChannelCreate] | None: + """Resolve channel references in calculated channel objects for export. + + The export API requires channel UUIDs in calculated channel references, but + channel references may contain channel names instead. This method attempts + to resolve each identifier by looking it up as a channel name via the channels + API. If no channel is found by that name, the identifier is assumed to already + be a UUID and is kept as-is. + """ + if not calculated_channels: + return calculated_channels + + resolved: list[CalculatedChannel | CalculatedChannelCreate] = [] + for cc in calculated_channels: + if isinstance(cc, CalculatedChannelCreate): + refs = cc.expression_channel_references or [] + asset_ids = cc.asset_ids + else: + refs = cc.channel_references + asset_ids = cc.asset_ids + + resolved_refs: list[ChannelReference] = [] + any_resolved = False + for ref in refs: + channel = await self.client.async_.channels.find( + name=ref.channel_identifier, + assets=asset_ids, + ) + if channel is not None: + resolved_refs.append( + ChannelReference( + channel_reference=ref.channel_reference, + channel_identifier=channel._id_or_error, + ) + ) + any_resolved = True + else: + # Assume already a UUID + resolved_refs.append(ref) + + if any_resolved: + resolved.append( + CalculatedChannelCreate( + name=cc.name, + expression=cc.expression, + expression_channel_references=resolved_refs, + units=cc.units if cc.units else None, + ) + ) + else: + resolved.append(cc) + + return resolved + async def export_by_run( self, *, @@ -124,6 +181,7 @@ async def export_by_run( channel_ids = ( [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] ) + resolved_calc_channels = await self._resolve_calculated_channels(calculated_channels) job_id = await self._low_level_client.export_data( run_ids=run_ids, @@ -131,7 +189,7 @@ async def export_by_run( start_time=start_time, stop_time=stop_time, channel_ids=channel_ids, - calculated_channels=calculated_channels, + calculated_channels=resolved_calc_channels, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, split_export_by_asset=split_export_by_asset, @@ -194,6 +252,7 @@ async def export_by_asset( channel_ids = ( [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] ) + resolved_calc_channels = await self._resolve_calculated_channels(calculated_channels) job_id = await self._low_level_client.export_data( asset_ids=asset_ids, @@ -201,7 +260,7 @@ async def export_by_asset( stop_time=stop_time, output_format=output_format, channel_ids=channel_ids, - calculated_channels=calculated_channels, + calculated_channels=resolved_calc_channels, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, split_export_by_asset=split_export_by_asset, @@ -266,13 +325,14 @@ async def export_by_time_range( channel_ids = ( [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] ) + resolved_calc_channels = await self._resolve_calculated_channels(calculated_channels) job_id = await self._low_level_client.export_data( start_time=start_time, stop_time=stop_time, output_format=output_format, channel_ids=channel_ids, - calculated_channels=calculated_channels, + calculated_channels=resolved_calc_channels, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, split_export_by_asset=split_export_by_asset, From efcbf84fe2528e17f9eee196ffab5cc11e6b1965 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 17 Mar 2026 15:41:09 -0700 Subject: [PATCH 22/53] python(fix): add assertions for export tests --- python/lib/sift_client/_tests/resources/test_exports.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 53c2f55a4..b454ab893 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -518,9 +518,8 @@ async def test_resolves_fetched_calculated_channel( assert resolved.name == "my_calc" assert resolved.expression == "$1 + 10" assert resolved.units == "m/s" - assert ( - resolved.expression_channel_references[0].channel_identifier == "resolved-ch-uuid" - ) + assert resolved.expression_channel_references is not None + assert resolved.expression_channel_references[0].channel_identifier == "resolved-ch-uuid" mock_client.async_.channels.find.assert_awaited_once_with( name="sensor.velocity", assets=["asset-1"] ) @@ -560,6 +559,7 @@ async def test_resolves_create_object_with_name_identifier( resolved = result[0] assert isinstance(resolved, CalculatedChannelCreate) + assert resolved.expression_channel_references is not None assert ( resolved.expression_channel_references[0].channel_identifier == "d8e64798-ad6f-41b8-b830-7e009806f365" From 0f853c74546c281521fa3fd8e2ee74f6e19153fb Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 17 Mar 2026 15:42:33 -0700 Subject: [PATCH 23/53] linting --- python/lib/sift_client/_tests/resources/test_exports.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index b454ab893..dfdbf895c 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -519,7 +519,9 @@ async def test_resolves_fetched_calculated_channel( assert resolved.expression == "$1 + 10" assert resolved.units == "m/s" assert resolved.expression_channel_references is not None - assert resolved.expression_channel_references[0].channel_identifier == "resolved-ch-uuid" + assert ( + resolved.expression_channel_references[0].channel_identifier == "resolved-ch-uuid" + ) mock_client.async_.channels.find.assert_awaited_once_with( name="sensor.velocity", assets=["asset-1"] ) From 66e29ca66f8471bd668a412ebbdf463bfd08cab2 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 17 Mar 2026 16:46:26 -0700 Subject: [PATCH 24/53] python(refactor): simplified resolve_calculated_channel logic --- python/lib/sift_client/resources/exports.py | 57 ++++++++------------- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index f95bb8319..8c4410b9d 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -71,57 +71,44 @@ async def _resolve_calculated_channels( self, calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None, ) -> list[CalculatedChannel | CalculatedChannelCreate] | None: - """Resolve channel references in calculated channel objects for export. + """Resolve channel reference identifiers from names to UUIDs. - The export API requires channel UUIDs in calculated channel references, but - channel references may contain channel names instead. This method attempts - to resolve each identifier by looking it up as a channel name via the channels - API. If no channel is found by that name, the identifier is assumed to already - be a UUID and is kept as-is. + For each channel reference, looks up the identifier as a channel name. + If found, replaces it with the channel's UUID. If not found, assumes + the identifier is already a UUID and keeps it as-is. """ if not calculated_channels: return calculated_channels resolved: list[CalculatedChannel | CalculatedChannelCreate] = [] for cc in calculated_channels: - if isinstance(cc, CalculatedChannelCreate): - refs = cc.expression_channel_references or [] - asset_ids = cc.asset_ids - else: - refs = cc.channel_references - asset_ids = cc.asset_ids + refs = ( + cc.expression_channel_references + if isinstance(cc, CalculatedChannelCreate) + else cc.channel_references + ) resolved_refs: list[ChannelReference] = [] - any_resolved = False for ref in refs: channel = await self.client.async_.channels.find( name=ref.channel_identifier, - assets=asset_ids, + assets=cc.asset_ids, ) if channel is not None: - resolved_refs.append( - ChannelReference( - channel_reference=ref.channel_reference, - channel_identifier=channel._id_or_error, - ) - ) - any_resolved = True - else: - # Assume already a UUID - resolved_refs.append(ref) - - if any_resolved: - resolved.append( - CalculatedChannelCreate( - name=cc.name, - expression=cc.expression, - expression_channel_references=resolved_refs, - units=cc.units if cc.units else None, + ref = ChannelReference( + channel_reference=ref.channel_reference, + channel_identifier=channel._id_or_error, ) + resolved_refs.append(ref) + + resolved.append( + CalculatedChannelCreate( + name=cc.name, + expression=cc.expression, + expression_channel_references=resolved_refs, + units=cc.units or None, ) - else: - resolved.append(cc) - + ) return resolved async def export_by_run( From 60a1942fc4eb60eb200c7ddb40ff65a2001c4464 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 17 Mar 2026 16:54:06 -0700 Subject: [PATCH 25/53] mypy fix --- python/lib/sift_client/resources/exports.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 8c4410b9d..2239eb124 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -83,7 +83,7 @@ async def _resolve_calculated_channels( resolved: list[CalculatedChannel | CalculatedChannelCreate] = [] for cc in calculated_channels: refs = ( - cc.expression_channel_references + (cc.expression_channel_references or []) if isinstance(cc, CalculatedChannelCreate) else cc.channel_references ) From bda329b26b2680152bb1b7c55da9e3acbc6da68e Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 17 Mar 2026 17:01:05 -0700 Subject: [PATCH 26/53] python(fix): update export tests to check equality --- .../sift_client/_tests/resources/test_exports.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index dfdbf895c..0ff44f96c 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -500,8 +500,8 @@ async def test_preserves_objects_when_identifiers_not_found( ): """channels.find returns None → identifiers assumed to be UUIDs, objects preserved.""" result = await exports_api._resolve_calculated_channels(sample_calc_channels) - assert result[0] is sample_calc_channels[0] - assert result[1] is sample_calc_channels[1] + assert result[0] == sample_calc_channels[0] + assert result[1] == sample_calc_channels[1] @pytest.mark.asyncio async def test_resolves_fetched_calculated_channel( @@ -528,7 +528,7 @@ async def test_resolves_fetched_calculated_channel( @pytest.mark.asyncio async def test_keeps_identifier_when_not_found(self, exports_api, mock_calculated_channel): - """channels.find returns None → identifier kept as-is, original object preserved.""" + """channels.find returns None → identifier kept as-is.""" mock_calculated_channel.channel_references = [ ChannelReference( channel_reference="$1", @@ -537,7 +537,12 @@ async def test_keeps_identifier_when_not_found(self, exports_api, mock_calculate ] result = await exports_api._resolve_calculated_channels([mock_calculated_channel]) - assert result[0] is mock_calculated_channel + resolved = result[0] + assert isinstance(resolved, CalculatedChannelCreate) + assert ( + resolved.expression_channel_references[0].channel_identifier + == "d8e64798-ad6f-41b8-b830-7e009806f365" + ) @pytest.mark.asyncio async def test_resolves_create_object_with_name_identifier( @@ -595,7 +600,7 @@ async def find_side_effect(name, assets=None): ) assert len(result) == 2 - assert result[0] is sample_calc_channels[0] + assert result[0] == sample_calc_channels[0] assert isinstance(result[1], CalculatedChannelCreate) assert result[1].expression_channel_references[0].channel_identifier == "rpm-uuid" From f42704a59fa186ca7ba9714cfa2e67b5413e70ca Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 17 Mar 2026 17:06:49 -0700 Subject: [PATCH 27/53] pyright --- python/lib/sift_client/_tests/resources/test_exports.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 0ff44f96c..2b128ee56 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -539,6 +539,7 @@ async def test_keeps_identifier_when_not_found(self, exports_api, mock_calculate result = await exports_api._resolve_calculated_channels([mock_calculated_channel]) resolved = result[0] assert isinstance(resolved, CalculatedChannelCreate) + assert resolved.expression_channel_references is not None assert ( resolved.expression_channel_references[0].channel_identifier == "d8e64798-ad6f-41b8-b830-7e009806f365" From 3d7d7c0f1938c5663be6f19ca56ad89292a3ec43 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 18 Mar 2026 10:57:46 -0700 Subject: [PATCH 28/53] python(fix): return paths to extracted_files in wait_until_complete --- python/lib/sift_client/_tests/resources/test_exports.py | 2 +- python/lib/sift_client/resources/exports.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 2b128ee56..669422e3e 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -125,7 +125,7 @@ def completed_export_setup(exports_api, mock_client, tmp_path): fake_file.write_text("col1,col2\n1,2") mock_loop = MagicMock() - mock_loop.run_in_executor = AsyncMock(return_value=None) + mock_loop.run_in_executor = AsyncMock(return_value=[fake_file]) return { "api": exports_api, diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 2239eb124..18c56542e 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -386,14 +386,14 @@ async def wait_until_complete( # Run the synchronous request in a thread pool to avoid blocking the event loop loop = asyncio.get_event_loop() - await loop.run_in_executor( + extracted_files = await loop.run_in_executor( None, ExportsAPIAsync._download_and_extract, presigned_url, zip_path, output_dir ) - return [f for f in output_dir.iterdir() if f.is_file()] + return extracted_files @staticmethod - def _download_and_extract(url: str, zip_path: Path, output_dir: Path) -> None: + def _download_and_extract(url: str, zip_path: Path, output_dir: Path) -> list[Path]: output_dir.mkdir(parents=True, exist_ok=True) with requests.get(url=url, stream=True) as response: response.raise_for_status() @@ -402,5 +402,7 @@ def _download_and_extract(url: str, zip_path: Path, output_dir: Path) -> None: if chunk: file.write(chunk) with zipfile.ZipFile(zip_path, "r") as zip_file: + names = zip_file.namelist() zip_file.extractall(output_dir) zip_path.unlink() + return [output_dir / name for name in names if not name.endswith("/")] From 7c1e13c2e9de15504763dd2c9e7aa90a616d7187 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 18 Mar 2026 14:56:12 -0700 Subject: [PATCH 29/53] python(refactor): rename wait_until_complete to download_when_complete and move download helper to util --- python/lib/sift_client/resources/exports.py | 23 ++--------- python/lib/sift_client/util/download.py | 43 +++++++++++++++++++++ 2 files changed, 46 insertions(+), 20 deletions(-) create mode 100644 python/lib/sift_client/util/download.py diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 18c56542e..c3b3204d7 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -2,12 +2,9 @@ import asyncio import tempfile -import zipfile from pathlib import Path from typing import TYPE_CHECKING -import requests - from sift_client._internal.low_level_wrappers.exports import ExportsLowLevelClient from sift_client.resources._base import ResourceBase from sift_client.sift_types.asset import Asset @@ -16,6 +13,7 @@ from sift_client.sift_types.export import ExportOutputFormat # noqa: TC001 from sift_client.sift_types.job import Job from sift_client.sift_types.run import Run +from sift_client.util.download import download_and_extract_zip if TYPE_CHECKING: from datetime import datetime @@ -328,7 +326,7 @@ async def export_by_time_range( return await self.client.async_.jobs.get(job_id=job_id) - async def wait_until_complete( + async def download_when_complete( self, *, job: Job | str, @@ -387,22 +385,7 @@ async def wait_until_complete( # Run the synchronous request in a thread pool to avoid blocking the event loop loop = asyncio.get_event_loop() extracted_files = await loop.run_in_executor( - None, ExportsAPIAsync._download_and_extract, presigned_url, zip_path, output_dir + None, download_and_extract_zip, presigned_url, zip_path, output_dir ) return extracted_files - - @staticmethod - def _download_and_extract(url: str, zip_path: Path, output_dir: Path) -> list[Path]: - output_dir.mkdir(parents=True, exist_ok=True) - with requests.get(url=url, stream=True) as response: - response.raise_for_status() - with zip_path.open("wb") as file: - for chunk in response.iter_content(chunk_size=4194304): # 4 MiB - if chunk: - file.write(chunk) - with zipfile.ZipFile(zip_path, "r") as zip_file: - names = zip_file.namelist() - zip_file.extractall(output_dir) - zip_path.unlink() - return [output_dir / name for name in names if not name.endswith("/")] diff --git a/python/lib/sift_client/util/download.py b/python/lib/sift_client/util/download.py new file mode 100644 index 000000000..5612df6fa --- /dev/null +++ b/python/lib/sift_client/util/download.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import zipfile +from pathlib import Path +from typing import TYPE_CHECKING + +import requests + +if TYPE_CHECKING: + from pathlib import Path + + +def download_and_extract_zip(url: str, zip_path: Path, output_dir: Path) -> list[Path]: + """Download a zip file from a URL and extract its contents. + + Downloads the file in streaming 4 MiB chunks, extracts all contents + to the output directory, then removes the zip file. + + Args: + url: The URL to download the zip file from. + zip_path: Path where the zip file will be temporarily saved. + output_dir: Directory to extract the zip contents into. + Created if it doesn't exist. + + Returns: + List of paths to the extracted files (excludes directories). + + Raises: + requests.HTTPError: If the download request fails. + zipfile.BadZipFile: If the downloaded file is not a valid zip. + """ + output_dir.mkdir(parents=True, exist_ok=True) + with requests.get(url=url, stream=True) as response: + response.raise_for_status() + with zip_path.open("wb") as file: + for chunk in response.iter_content(chunk_size=4194304): # 4 MiB + if chunk: + file.write(chunk) + with zipfile.ZipFile(zip_path, "r") as zip_file: + names = zip_file.namelist() + zip_file.extractall(output_dir) + zip_path.unlink() + return [output_dir / name for name in names if not name.endswith("/")] From 2455fd100bc1a812d770980458b9f061ff9aef8b Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 18 Mar 2026 15:03:40 -0700 Subject: [PATCH 30/53] python(refactor): rename to wait_and_download and regenerate stubs --- python/lib/sift_client/resources/exports.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index c3b3204d7..98ed066b5 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -30,7 +30,7 @@ class ExportsAPIAsync(ResourceBase): - ``export_by_asset`` - Export data from one or more assets within a time range. - ``export_by_time_range`` - Export data within a time range (requires channels or calculated_channels). - Each method initiates the export and returns a Job handle. Use ``wait_until_complete`` + Each method initiates the export and returns a Job handle. Use ``wait_and_download`` to poll the job, download the export, and get the paths to the extracted files. Example:: @@ -43,7 +43,7 @@ class ExportsAPIAsync(ResourceBase): runs=[run], output_format=ExportOutputFormat.CSV, ) - files = await client.async_.exports.wait_until_complete(job=job) + files = await client.async_.exports.wait_and_download(job=job) # Export by asset with time range asset = await client.async_.assets.get(asset_id="asset-id-1") @@ -53,7 +53,7 @@ class ExportsAPIAsync(ResourceBase): stop_time=stop, output_format=ExportOutputFormat.CSV, ) - files = await client.async_.exports.wait_until_complete(job=job) + files = await client.async_.exports.wait_and_download(job=job) """ def __init__(self, sift_client: SiftClient): @@ -126,7 +126,7 @@ async def export_by_run( """Export data scoped by one or more runs. Initiates the export on the server and returns a Job handle. Use - ``wait_until_complete`` to poll for completion and get the download URL. + ``wait_and_download`` to poll for completion and get the download URL. If no start_time/stop_time are provided, the full time range of each run is used. If no channels or calculated_channels are provided, all channels from @@ -200,7 +200,7 @@ async def export_by_asset( """Export data scoped by one or more assets within a time range. Initiates the export on the server and returns a Job handle. Use - ``wait_until_complete`` to poll for completion and get the download URL. + ``wait_and_download`` to poll for completion and get the download URL. Both start_time and stop_time are required. If no channels or calculated_channels are provided, all channels from the assets are included. @@ -270,7 +270,7 @@ async def export_by_time_range( """Export data within a time range. Initiates the export on the server and returns a Job handle. Use - ``wait_until_complete`` to poll for completion and get the download URL. + ``wait_and_download`` to poll for completion and get the download URL. Both start_time and stop_time are required. At least one of channels or calculated_channels **must** be provided to scope the data, since there @@ -326,7 +326,7 @@ async def export_by_time_range( return await self.client.async_.jobs.get(job_id=job_id) - async def download_when_complete( + async def wait_and_download( self, *, job: Job | str, From 837d8fa922a7d7f5a098c0a3587a45f63a1b310f Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 18 Mar 2026 15:18:17 -0700 Subject: [PATCH 31/53] python(refactor): revert _grpc to grpc and updated stubs --- .../_internal/{_grpc => grpc}/__init__.py | 2 +- .../_async_interceptors/__init__.py | 0 .../{_grpc => grpc}/_async_interceptors/base.py | 0 .../_async_interceptors/metadata.py | 2 +- .../{_grpc => grpc}/_interceptors/__init__.py | 0 .../{_grpc => grpc}/_interceptors/base.py | 0 .../{_grpc => grpc}/_interceptors/context.py | 0 .../{_grpc => grpc}/_interceptors/metadata.py | 4 ++-- .../_internal/{_grpc => grpc}/_retry.py | 0 .../_internal/{_grpc => grpc}/keepalive.py | 0 .../server_interceptors/__init__.py | 0 .../server_interceptors/server_interceptor.py | 0 .../_internal/{_grpc => grpc}/transport.py | 12 ++++++------ .../_internal/{_grpc => grpc}/transport_test.py | 4 ++-- python/lib/sift_client/_internal/rest.py | 2 +- .../sift_client/resources/sync_stubs/__init__.pyi | 14 +++++++------- python/lib/sift_client/transport/grpc_transport.py | 2 +- 17 files changed, 21 insertions(+), 21 deletions(-) rename python/lib/sift_client/_internal/{_grpc => grpc}/__init__.py (82%) rename python/lib/sift_client/_internal/{_grpc => grpc}/_async_interceptors/__init__.py (100%) rename python/lib/sift_client/_internal/{_grpc => grpc}/_async_interceptors/base.py (100%) rename python/lib/sift_client/_internal/{_grpc => grpc}/_async_interceptors/metadata.py (91%) rename python/lib/sift_client/_internal/{_grpc => grpc}/_interceptors/__init__.py (100%) rename python/lib/sift_client/_internal/{_grpc => grpc}/_interceptors/base.py (100%) rename python/lib/sift_client/_internal/{_grpc => grpc}/_interceptors/context.py (100%) rename python/lib/sift_client/_internal/{_grpc => grpc}/_interceptors/metadata.py (81%) rename python/lib/sift_client/_internal/{_grpc => grpc}/_retry.py (100%) rename python/lib/sift_client/_internal/{_grpc => grpc}/keepalive.py (100%) rename python/lib/sift_client/_internal/{_grpc => grpc}/server_interceptors/__init__.py (100%) rename python/lib/sift_client/_internal/{_grpc => grpc}/server_interceptors/server_interceptor.py (100%) rename python/lib/sift_client/_internal/{_grpc => grpc}/transport.py (94%) rename python/lib/sift_client/_internal/{_grpc => grpc}/transport_test.py (97%) diff --git a/python/lib/sift_client/_internal/_grpc/__init__.py b/python/lib/sift_client/_internal/grpc/__init__.py similarity index 82% rename from python/lib/sift_client/_internal/_grpc/__init__.py rename to python/lib/sift_client/_internal/grpc/__init__.py index 85a0bd3ce..738259dc8 100644 --- a/python/lib/sift_client/_internal/_grpc/__init__.py +++ b/python/lib/sift_client/_internal/grpc/__init__.py @@ -4,7 +4,7 @@ Example of establishing a connection to Sift's gRPC APi: ```python -from sift_client._internal._grpc.transport import SiftChannelConfig, use_sift_channel +from sift_client._internal.grpc.transport import SiftChannelConfig, use_sift_channel # Be sure not to include the url scheme i.e. 'https://' in the uri. sift_channel_config = SiftChannelConfig(uri=SIFT_BASE_URI, apikey=SIFT_API_KEY) diff --git a/python/lib/sift_client/_internal/_grpc/_async_interceptors/__init__.py b/python/lib/sift_client/_internal/grpc/_async_interceptors/__init__.py similarity index 100% rename from python/lib/sift_client/_internal/_grpc/_async_interceptors/__init__.py rename to python/lib/sift_client/_internal/grpc/_async_interceptors/__init__.py diff --git a/python/lib/sift_client/_internal/_grpc/_async_interceptors/base.py b/python/lib/sift_client/_internal/grpc/_async_interceptors/base.py similarity index 100% rename from python/lib/sift_client/_internal/_grpc/_async_interceptors/base.py rename to python/lib/sift_client/_internal/grpc/_async_interceptors/base.py diff --git a/python/lib/sift_client/_internal/_grpc/_async_interceptors/metadata.py b/python/lib/sift_client/_internal/grpc/_async_interceptors/metadata.py similarity index 91% rename from python/lib/sift_client/_internal/_grpc/_async_interceptors/metadata.py rename to python/lib/sift_client/_internal/grpc/_async_interceptors/metadata.py index 08e601a95..95cc5a925 100644 --- a/python/lib/sift_client/_internal/_grpc/_async_interceptors/metadata.py +++ b/python/lib/sift_client/_internal/grpc/_async_interceptors/metadata.py @@ -4,7 +4,7 @@ from grpc import aio as grpc_aio -from sift_client._internal._grpc._async_interceptors.base import ClientAsyncInterceptor +from sift_client._internal.grpc._async_interceptors.base import ClientAsyncInterceptor Metadata = List[Tuple[str, str]] diff --git a/python/lib/sift_client/_internal/_grpc/_interceptors/__init__.py b/python/lib/sift_client/_internal/grpc/_interceptors/__init__.py similarity index 100% rename from python/lib/sift_client/_internal/_grpc/_interceptors/__init__.py rename to python/lib/sift_client/_internal/grpc/_interceptors/__init__.py diff --git a/python/lib/sift_client/_internal/_grpc/_interceptors/base.py b/python/lib/sift_client/_internal/grpc/_interceptors/base.py similarity index 100% rename from python/lib/sift_client/_internal/_grpc/_interceptors/base.py rename to python/lib/sift_client/_internal/grpc/_interceptors/base.py diff --git a/python/lib/sift_client/_internal/_grpc/_interceptors/context.py b/python/lib/sift_client/_internal/grpc/_interceptors/context.py similarity index 100% rename from python/lib/sift_client/_internal/_grpc/_interceptors/context.py rename to python/lib/sift_client/_internal/grpc/_interceptors/context.py diff --git a/python/lib/sift_client/_internal/_grpc/_interceptors/metadata.py b/python/lib/sift_client/_internal/grpc/_interceptors/metadata.py similarity index 81% rename from python/lib/sift_client/_internal/_grpc/_interceptors/metadata.py rename to python/lib/sift_client/_internal/grpc/_interceptors/metadata.py index fdc4fd223..afb5da50c 100644 --- a/python/lib/sift_client/_internal/_grpc/_interceptors/metadata.py +++ b/python/lib/sift_client/_internal/grpc/_interceptors/metadata.py @@ -2,8 +2,8 @@ import grpc -from sift_client._internal._grpc._interceptors.base import ClientInterceptor, Continuation -from sift_client._internal._grpc._interceptors.context import ClientCallDetails +from sift_client._internal.grpc._interceptors.base import ClientInterceptor, Continuation +from sift_client._internal.grpc._interceptors.context import ClientCallDetails Metadata = List[Tuple[str, str]] diff --git a/python/lib/sift_client/_internal/_grpc/_retry.py b/python/lib/sift_client/_internal/grpc/_retry.py similarity index 100% rename from python/lib/sift_client/_internal/_grpc/_retry.py rename to python/lib/sift_client/_internal/grpc/_retry.py diff --git a/python/lib/sift_client/_internal/_grpc/keepalive.py b/python/lib/sift_client/_internal/grpc/keepalive.py similarity index 100% rename from python/lib/sift_client/_internal/_grpc/keepalive.py rename to python/lib/sift_client/_internal/grpc/keepalive.py diff --git a/python/lib/sift_client/_internal/_grpc/server_interceptors/__init__.py b/python/lib/sift_client/_internal/grpc/server_interceptors/__init__.py similarity index 100% rename from python/lib/sift_client/_internal/_grpc/server_interceptors/__init__.py rename to python/lib/sift_client/_internal/grpc/server_interceptors/__init__.py diff --git a/python/lib/sift_client/_internal/_grpc/server_interceptors/server_interceptor.py b/python/lib/sift_client/_internal/grpc/server_interceptors/server_interceptor.py similarity index 100% rename from python/lib/sift_client/_internal/_grpc/server_interceptors/server_interceptor.py rename to python/lib/sift_client/_internal/grpc/server_interceptors/server_interceptor.py diff --git a/python/lib/sift_client/_internal/_grpc/transport.py b/python/lib/sift_client/_internal/grpc/transport.py similarity index 94% rename from python/lib/sift_client/_internal/_grpc/transport.py rename to python/lib/sift_client/_internal/grpc/transport.py index 87dc9b3ec..1043245a8 100644 --- a/python/lib/sift_client/_internal/_grpc/transport.py +++ b/python/lib/sift_client/_internal/grpc/transport.py @@ -14,14 +14,14 @@ import grpc.aio as grpc_aio from typing_extensions import NotRequired, TypeAlias -from sift_client._internal._grpc._async_interceptors.metadata import MetadataAsyncInterceptor -from sift_client._internal._grpc._interceptors.metadata import Metadata, MetadataInterceptor +from sift_client._internal.grpc._async_interceptors.metadata import MetadataAsyncInterceptor +from sift_client._internal.grpc._interceptors.metadata import Metadata, MetadataInterceptor if TYPE_CHECKING: - from sift_client._internal._grpc._async_interceptors.base import ClientAsyncInterceptor - from sift_client._internal._grpc._interceptors.base import ClientInterceptor -from sift_client._internal._grpc._retry import RetryPolicy -from sift_client._internal._grpc.keepalive import DEFAULT_KEEPALIVE_CONFIG, KeepaliveConfig + from sift_client._internal.grpc._async_interceptors.base import ClientAsyncInterceptor + from sift_client._internal.grpc._interceptors.base import ClientInterceptor +from sift_client._internal.grpc._retry import RetryPolicy +from sift_client._internal.grpc.keepalive import DEFAULT_KEEPALIVE_CONFIG, KeepaliveConfig SiftChannel: TypeAlias = grpc.Channel SiftAsyncChannel: TypeAlias = grpc_aio.Channel diff --git a/python/lib/sift_client/_internal/_grpc/transport_test.py b/python/lib/sift_client/_internal/grpc/transport_test.py similarity index 97% rename from python/lib/sift_client/_internal/_grpc/transport_test.py rename to python/lib/sift_client/_internal/grpc/transport_test.py index 12e03f78a..efccb6b4e 100644 --- a/python/lib/sift_client/_internal/_grpc/transport_test.py +++ b/python/lib/sift_client/_internal/grpc/transport_test.py @@ -15,8 +15,8 @@ add_DataServiceServicer_to_server, ) -from sift_client._internal._grpc.server_interceptors.server_interceptor import ServerInterceptor -from sift_client._internal._grpc.transport import SiftChannelConfig, use_sift_channel +from sift_client._internal.grpc.server_interceptors.server_interceptor import ServerInterceptor +from sift_client._internal.grpc.transport import SiftChannelConfig, use_sift_channel class DataService(DataServiceServicer): diff --git a/python/lib/sift_client/_internal/rest.py b/python/lib/sift_client/_internal/rest.py index 3c89045c9..5f5c954c3 100644 --- a/python/lib/sift_client/_internal/rest.py +++ b/python/lib/sift_client/_internal/rest.py @@ -6,7 +6,7 @@ from typing_extensions import NotRequired from urllib3.util import Retry -from sift_client._internal._grpc.transport import _clean_uri +from sift_client._internal.grpc.transport import _clean_uri _DEFAULT_REST_RETRY = Retry(total=3, status_forcelist=[500, 502, 503, 504], backoff_factor=1) diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index 5b044ddfc..49c04ee73 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -549,7 +549,7 @@ class ExportsAPI: - ``export_by_asset`` - Export data from one or more assets within a time range. - ``export_by_time_range`` - Export data within a time range (requires channels or calculated_channels). - Each method initiates the export and returns a Job handle. Use ``wait_until_complete`` + Each method initiates the export and returns a Job handle. Use ``wait_and_download`` to poll the job, download the export, and get the paths to the extracted files. Example:: @@ -562,7 +562,7 @@ class ExportsAPI: runs=[run], output_format=ExportOutputFormat.CSV, ) - files = await client.async_.exports.wait_until_complete(job=job) + files = await client.async_.exports.wait_and_download(job=job) # Export by asset with time range asset = await client.async_.assets.get(asset_id="asset-id-1") @@ -572,7 +572,7 @@ class ExportsAPI: stop_time=stop, output_format=ExportOutputFormat.CSV, ) - files = await client.async_.exports.wait_until_complete(job=job) + files = await client.async_.exports.wait_and_download(job=job) """ def __init__(self, sift_client: SiftClient): @@ -601,7 +601,7 @@ class ExportsAPI: """Export data scoped by one or more assets within a time range. Initiates the export on the server and returns a Job handle. Use - ``wait_until_complete`` to poll for completion and get the download URL. + ``wait_and_download`` to poll for completion and get the download URL. Both start_time and stop_time are required. If no channels or calculated_channels are provided, all channels from the assets are included. @@ -640,7 +640,7 @@ class ExportsAPI: """Export data scoped by one or more runs. Initiates the export on the server and returns a Job handle. Use - ``wait_until_complete`` to poll for completion and get the download URL. + ``wait_and_download`` to poll for completion and get the download URL. If no start_time/stop_time are provided, the full time range of each run is used. If no channels or calculated_channels are provided, all channels from @@ -679,7 +679,7 @@ class ExportsAPI: """Export data within a time range. Initiates the export on the server and returns a Job handle. Use - ``wait_until_complete`` to poll for completion and get the download URL. + ``wait_and_download`` to poll for completion and get the download URL. Both start_time and stop_time are required. At least one of channels or calculated_channels **must** be provided to scope the data, since there @@ -704,7 +704,7 @@ class ExportsAPI: """ ... - def wait_until_complete( + def wait_and_download( self, *, job: Job | str, diff --git a/python/lib/sift_client/transport/grpc_transport.py b/python/lib/sift_client/transport/grpc_transport.py index ba728980f..95817a010 100644 --- a/python/lib/sift_client/transport/grpc_transport.py +++ b/python/lib/sift_client/transport/grpc_transport.py @@ -13,7 +13,7 @@ from typing import Any from urllib.parse import urlparse -from sift_client._internal._grpc.transport import ( +from sift_client._internal.grpc.transport import ( SiftChannelConfig, use_sift_async_channel, ) From 535335adcab43b0aa7c16d2ecd05766f27da1bef Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 18 Mar 2026 15:26:30 -0700 Subject: [PATCH 32/53] python(fix): updated unit tests to use the renamed function --- .../sift_client/_tests/resources/test_exports.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 669422e3e..74853a39e 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -3,7 +3,7 @@ These tests validate the usage of the ExportsAPIAsync including: - Correct delegation to the low-level client for all three export methods - Domain object resolution (Run -> run_id, Asset -> asset_id, Channel -> channel_id) -- Job lifecycle: export methods return Job, wait_until_complete returns list of file paths +- Job lifecycle: export methods return Job, wait_and_download returns list of file paths - Input validation and error handling """ @@ -110,7 +110,7 @@ def mock_resolved_channel(): @pytest.fixture def completed_export_setup(exports_api, mock_client, tmp_path): - """Set up mocks for a successful wait_until_complete call. + """Set up mocks for a successful wait_and_download call. Returns a dict with the exports_api, mock_client, tmp_path, and fake_file. """ @@ -606,7 +606,7 @@ async def find_side_effect(name, assets=None): assert result[1].expression_channel_references[0].channel_identifier == "rpm-uuid" class TestWaitUntilComplete: - """Tests for the wait_until_complete method.""" + """Tests for the wait_and_download method.""" @pytest.mark.asyncio async def test_returns_file_paths_on_success(self, completed_export_setup): @@ -615,7 +615,7 @@ async def test_returns_file_paths_on_success(self, completed_export_setup): mock_job._id_or_error = "job-123" with patch("asyncio.get_event_loop", return_value=s["mock_loop"]): - result = await s["api"].wait_until_complete(job=mock_job, output_dir=s["tmp_path"]) + result = await s["api"].wait_and_download(job=mock_job, output_dir=s["tmp_path"]) assert result == [s["fake_file"]] s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( @@ -628,7 +628,7 @@ async def test_accepts_job_id_string(self, completed_export_setup): s = completed_export_setup with patch("asyncio.get_event_loop", return_value=s["mock_loop"]): - result = await s["api"].wait_until_complete(job="job-456", output_dir=s["tmp_path"]) + result = await s["api"].wait_and_download(job="job-456", output_dir=s["tmp_path"]) assert result == [s["fake_file"]] s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( @@ -642,7 +642,7 @@ async def test_custom_polling_and_timeout(self, completed_export_setup): mock_job._id_or_error = "job-123" with patch("asyncio.get_event_loop", return_value=s["mock_loop"]): - await s["api"].wait_until_complete( + await s["api"].wait_and_download( job=mock_job, polling_interval_secs=1, timeout_secs=10, output_dir=s["tmp_path"] ) @@ -675,4 +675,4 @@ async def test_terminal_job_status_raises( mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) with pytest.raises(RuntimeError, match=match): - await exports_api.wait_until_complete(job=mock_job) + await exports_api.wait_and_download(job=mock_job) From ca7dd630507b3c2e32b251092b9235ffdadcd189 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 18 Mar 2026 15:52:39 -0700 Subject: [PATCH 33/53] python(refactor): shared _export helper to deduplicate export methods --- .../_tests/resources/test_exports.py | 5 + python/lib/sift_client/resources/exports.py | 111 ++++++++++-------- 2 files changed, 64 insertions(+), 52 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 74853a39e..8b7de613e 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -211,6 +211,7 @@ async def test_delegates_to_low_level_and_returns_job(self, exports_api): assert isinstance(job, MagicMock) exports_api._low_level_client.export_data.assert_awaited_once_with( run_ids=["run-1", "run-2"], + asset_ids=None, output_format=ExportOutputFormat.CSV, start_time=START, stop_time=STOP, @@ -232,6 +233,7 @@ async def test_minimal_args(self, exports_api): exports_api._low_level_client.export_data.assert_awaited_once_with( run_ids=["run-1"], + asset_ids=None, output_format=ExportOutputFormat.SUN, start_time=None, stop_time=None, @@ -329,6 +331,7 @@ async def test_delegates_to_low_level_and_returns_job(self, exports_api): assert isinstance(job, MagicMock) exports_api._low_level_client.export_data.assert_awaited_once_with( + run_ids=None, asset_ids=["asset-1"], start_time=START, stop_time=STOP, @@ -403,6 +406,8 @@ async def test_delegates_to_low_level_with_channels(self, exports_api): ) exports_api._low_level_client.export_data.assert_awaited_once_with( + run_ids=None, + asset_ids=None, start_time=START, stop_time=STOP, output_format=ExportOutputFormat.SUN, diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 98ed066b5..7e1bf5ad7 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -109,6 +109,56 @@ async def _resolve_calculated_channels( ) return resolved + async def _export( + self, + *, + output_format: ExportOutputFormat, + start_time: datetime | None = None, + stop_time: datetime | None = None, + run_ids: list[str] | None = None, + asset_ids: list[str] | None = None, + channels: list[str | Channel] | None = None, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, + simplify_channel_names: bool = False, + combine_runs: bool = False, + split_export_by_asset: bool = False, + split_export_by_run: bool = False, + ) -> Job: + """Shared implementation for all export methods. + + Validates common constraints, resolves channels, calls the low-level + export API, and returns the resulting Job. + """ + if start_time is not None and stop_time is not None and start_time >= stop_time: + raise ValueError("'start_time' must be before 'stop_time'.") + if combine_runs and split_export_by_run: + raise ValueError( + "'combine_runs' cannot be used with 'split_export_by_run'. " + "Combining merges identical channels across runs into a single column, " + "which is not possible when each run is split into a separate file." + ) + + channel_ids = ( + [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] + ) + resolved_calc_channels = await self._resolve_calculated_channels(calculated_channels) + + job_id = await self._low_level_client.export_data( + run_ids=run_ids, + asset_ids=asset_ids, + output_format=output_format, + start_time=start_time, + stop_time=stop_time, + channel_ids=channel_ids, + calculated_channels=resolved_calc_channels, + simplify_channel_names=simplify_channel_names, + combine_runs=combine_runs, + split_export_by_asset=split_export_by_asset, + split_export_by_run=split_export_by_run, + ) + + return await self.client.async_.jobs.get(job_id=job_id) + async def export_by_run( self, *, @@ -153,36 +203,22 @@ async def export_by_run( raise ValueError("'runs' must not contain empty or null values.") if (start_time is None) != (stop_time is None): raise ValueError("'start_time' and 'stop_time' must both be provided or both omitted.") - if start_time and stop_time and start_time >= stop_time: - raise ValueError("'start_time' must be before 'stop_time'.") - if combine_runs and split_export_by_run: - raise ValueError( - "'combine_runs' cannot be used with 'split_export_by_run'. " - "Combining merges identical channels across runs into a single column, " - "which is not possible when each run is split into a separate file." - ) run_ids = [r._id_or_error if isinstance(r, Run) else r for r in runs] - channel_ids = ( - [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] - ) - resolved_calc_channels = await self._resolve_calculated_channels(calculated_channels) - job_id = await self._low_level_client.export_data( + return await self._export( run_ids=run_ids, output_format=output_format, start_time=start_time, stop_time=stop_time, - channel_ids=channel_ids, - calculated_channels=resolved_calc_channels, + channels=channels, + calculated_channels=calculated_channels, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, split_export_by_asset=split_export_by_asset, split_export_by_run=split_export_by_run, ) - return await self.client.async_.jobs.get(job_id=job_id) - async def export_by_asset( self, *, @@ -224,36 +260,22 @@ async def export_by_asset( raise ValueError("'assets' must be a non-empty list of asset objects or asset IDs.") if any(not asset for asset in assets): raise ValueError("'assets' must not contain empty or null values.") - if start_time >= stop_time: - raise ValueError("'start_time' must be before 'stop_time'.") - if combine_runs and split_export_by_run: - raise ValueError( - "'combine_runs' cannot be used with 'split_export_by_run'. " - "Combining merges identical channels across runs into a single column, " - "which is not possible when each run is split into a separate file." - ) asset_ids = [a._id_or_error if isinstance(a, Asset) else a for a in assets] - channel_ids = ( - [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] - ) - resolved_calc_channels = await self._resolve_calculated_channels(calculated_channels) - job_id = await self._low_level_client.export_data( + return await self._export( asset_ids=asset_ids, start_time=start_time, stop_time=stop_time, output_format=output_format, - channel_ids=channel_ids, - calculated_channels=resolved_calc_channels, + channels=channels, + calculated_channels=calculated_channels, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, split_export_by_asset=split_export_by_asset, split_export_by_run=split_export_by_run, ) - return await self.client.async_.jobs.get(job_id=job_id) - async def export_by_time_range( self, *, @@ -298,34 +320,19 @@ async def export_by_time_range( "At least one of 'channels' or 'calculated_channels' must be provided " "when exporting by time range." ) - if start_time >= stop_time: - raise ValueError("'start_time' must be before 'stop_time'.") - if combine_runs and split_export_by_run: - raise ValueError( - "'combine_runs' cannot be used with 'split_export_by_run'. " - "Combining merges identical channels across runs into a single column, " - "which is not possible when each run is split into a separate file." - ) - - channel_ids = ( - [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] - ) - resolved_calc_channels = await self._resolve_calculated_channels(calculated_channels) - job_id = await self._low_level_client.export_data( + return await self._export( start_time=start_time, stop_time=stop_time, output_format=output_format, - channel_ids=channel_ids, - calculated_channels=resolved_calc_channels, + channels=channels, + calculated_channels=calculated_channels, simplify_channel_names=simplify_channel_names, combine_runs=combine_runs, split_export_by_asset=split_export_by_asset, split_export_by_run=split_export_by_run, ) - return await self.client.async_.jobs.get(job_id=job_id) - async def wait_and_download( self, *, From 6e78e03ed043fea7c4a1e74988639036acb4b0c7 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 18 Mar 2026 16:26:26 -0700 Subject: [PATCH 34/53] python(fix): use asyncio.get_running_loop() instead of deprecated get_event_loop() --- .../_tests/resources/test_exports.py | 6 +-- python/lib/sift_client/resources/exports.py | 38 ++----------------- .../resources/sync_stubs/__init__.pyi | 31 --------------- 3 files changed, 6 insertions(+), 69 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 8b7de613e..767567110 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -619,7 +619,7 @@ async def test_returns_file_paths_on_success(self, completed_export_setup): mock_job = MagicMock(spec=Job) mock_job._id_or_error = "job-123" - with patch("asyncio.get_event_loop", return_value=s["mock_loop"]): + with patch("asyncio.get_running_loop", return_value=s["mock_loop"]): result = await s["api"].wait_and_download(job=mock_job, output_dir=s["tmp_path"]) assert result == [s["fake_file"]] @@ -632,7 +632,7 @@ async def test_returns_file_paths_on_success(self, completed_export_setup): async def test_accepts_job_id_string(self, completed_export_setup): s = completed_export_setup - with patch("asyncio.get_event_loop", return_value=s["mock_loop"]): + with patch("asyncio.get_running_loop", return_value=s["mock_loop"]): result = await s["api"].wait_and_download(job="job-456", output_dir=s["tmp_path"]) assert result == [s["fake_file"]] @@ -646,7 +646,7 @@ async def test_custom_polling_and_timeout(self, completed_export_setup): mock_job = MagicMock(spec=Job) mock_job._id_or_error = "job-123" - with patch("asyncio.get_event_loop", return_value=s["mock_loop"]): + with patch("asyncio.get_running_loop", return_value=s["mock_loop"]): await s["api"].wait_and_download( job=mock_job, polling_interval_secs=1, timeout_secs=10, output_dir=s["tmp_path"] ) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 7e1bf5ad7..c1e30f925 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -22,39 +22,7 @@ class ExportsAPIAsync(ResourceBase): - """High-level API for exporting data from Sift. - - Provides three export methods based on how you want to scope the data: - - - ``export_by_run`` - Export data from one or more runs. - - ``export_by_asset`` - Export data from one or more assets within a time range. - - ``export_by_time_range`` - Export data within a time range (requires channels or calculated_channels). - - Each method initiates the export and returns a Job handle. Use ``wait_and_download`` - to poll the job, download the export, and get the paths to the extracted files. - - Example:: - - from sift_client.sift_types.export import ExportOutputFormat - - # Export by run - run = await client.async_.runs.get(run_id="run-id-1") - job = await client.async_.exports.export_by_run( - runs=[run], - output_format=ExportOutputFormat.CSV, - ) - files = await client.async_.exports.wait_and_download(job=job) - - # Export by asset with time range - asset = await client.async_.assets.get(asset_id="asset-id-1") - job = await client.async_.exports.export_by_asset( - assets=[asset], - start_time=start, - stop_time=stop, - output_format=ExportOutputFormat.CSV, - ) - files = await client.async_.exports.wait_and_download(job=job) - """ + """High-level API for exporting data from Sift.""" def __init__(self, sift_client: SiftClient): """Initialize the ExportsAPI. @@ -389,8 +357,8 @@ async def wait_and_download( ) zip_path = output_dir / f"{job_id}.zip" - # Run the synchronous request in a thread pool to avoid blocking the event loop - loop = asyncio.get_event_loop() + # Run the synchronous download in a thread pool to avoid blocking the event loop + loop = asyncio.get_running_loop() extracted_files = await loop.run_in_executor( None, download_and_extract_zip, presigned_url, zip_path, output_dir ) diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index 49c04ee73..fe00829aa 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -542,37 +542,6 @@ class ExportsAPI: """Sync counterpart to `ExportsAPIAsync`. High-level API for exporting data from Sift. - - Provides three export methods based on how you want to scope the data: - - - ``export_by_run`` - Export data from one or more runs. - - ``export_by_asset`` - Export data from one or more assets within a time range. - - ``export_by_time_range`` - Export data within a time range (requires channels or calculated_channels). - - Each method initiates the export and returns a Job handle. Use ``wait_and_download`` - to poll the job, download the export, and get the paths to the extracted files. - - Example:: - - from sift_client.sift_types.export import ExportOutputFormat - - # Export by run - run = await client.async_.runs.get(run_id="run-id-1") - job = await client.async_.exports.export_by_run( - runs=[run], - output_format=ExportOutputFormat.CSV, - ) - files = await client.async_.exports.wait_and_download(job=job) - - # Export by asset with time range - asset = await client.async_.assets.get(asset_id="asset-id-1") - job = await client.async_.exports.export_by_asset( - assets=[asset], - start_time=start, - stop_time=stop, - output_format=ExportOutputFormat.CSV, - ) - files = await client.async_.exports.wait_and_download(job=job) """ def __init__(self, sift_client: SiftClient): From 61a355c622d25f0dde6302f9343489cf37f415a9 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 18 Mar 2026 17:20:10 -0700 Subject: [PATCH 35/53] python(fix): add integration tests and combined duplicate unit tests --- .../_tests/resources/test_exports.py | 324 +++++++++++------- 1 file changed, 194 insertions(+), 130 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 767567110..e6981918d 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -9,12 +9,17 @@ from __future__ import annotations -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone +from typing import TYPE_CHECKING from unittest.mock import AsyncMock, MagicMock, patch import pytest from sift_client._internal.low_level_wrappers.exports import _build_calc_channel_configs + +if TYPE_CHECKING: + from sift_client import SiftClient +from sift_client.resources import ExportsAPI from sift_client.resources.exports import ExportsAPIAsync from sift_client.sift_types.asset import Asset from sift_client.sift_types.calculated_channel import ( @@ -31,6 +36,188 @@ STOP = datetime(2025, 1, 2, tzinfo=timezone.utc) +@pytest.mark.integration +def test_client_binding(sift_client): + assert sift_client.exports + assert isinstance(sift_client.exports, ExportsAPI) + assert sift_client.async_.exports + assert isinstance(sift_client.async_.exports, ExportsAPIAsync) + + +@pytest.fixture +def exports_api_async(sift_client: SiftClient): + return sift_client.async_.exports + + +@pytest.fixture +def exports_api_sync(sift_client: SiftClient): + return sift_client.exports + + +@pytest.mark.integration +class TestExportsIntegrationAsync: + """Integration tests for the async Exports API.""" + + class TestExportByRun: + @pytest.mark.asyncio + async def test_basic(self, exports_api_async, sift_client): + runs = await sift_client.async_.runs.list_(limit=1) + assert runs, "No runs available for integration test" + + job = await exports_api_async.export_by_run( + runs=[runs[0]], + output_format=ExportOutputFormat.CSV, + ) + assert isinstance(job, Job) + assert job.id_ is not None + + @pytest.mark.asyncio + async def test_with_run_id_string(self, exports_api_async, sift_client): + runs = await sift_client.async_.runs.list_(limit=1) + assert runs + + job = await exports_api_async.export_by_run( + runs=[runs[0].id_], + output_format=ExportOutputFormat.CSV, + ) + assert isinstance(job, Job) + + @pytest.mark.asyncio + async def test_with_time_range(self, exports_api_async, sift_client): + runs = await sift_client.async_.runs.list_(limit=1) + assert runs + + now = datetime.now(timezone.utc) + job = await exports_api_async.export_by_run( + runs=[runs[0]], + start_time=now - timedelta(hours=1), + stop_time=now, + output_format=ExportOutputFormat.CSV, + ) + assert isinstance(job, Job) + + class TestExportByAsset: + @pytest.mark.asyncio + async def test_basic(self, exports_api_async, sift_client): + assets = await sift_client.async_.assets.list_(limit=1) + assert assets, "No assets available for integration test" + + now = datetime.now(timezone.utc) + job = await exports_api_async.export_by_asset( + assets=[assets[0]], + start_time=now - timedelta(hours=1), + stop_time=now, + output_format=ExportOutputFormat.CSV, + ) + assert isinstance(job, Job) + assert job.id_ is not None + + @pytest.mark.asyncio + async def test_with_asset_id_string(self, exports_api_async, sift_client): + assets = await sift_client.async_.assets.list_(limit=1) + assert assets + + now = datetime.now(timezone.utc) + job = await exports_api_async.export_by_asset( + assets=[assets[0].id_], + start_time=now - timedelta(hours=1), + stop_time=now, + output_format=ExportOutputFormat.CSV, + ) + assert isinstance(job, Job) + + class TestExportByTimeRange: + @pytest.mark.asyncio + async def test_with_channels(self, exports_api_async, sift_client): + channels = await sift_client.async_.channels.list_(limit=1) + assert channels, "No channels available for integration test" + + now = datetime.now(timezone.utc) + job = await exports_api_async.export_by_time_range( + start_time=now - timedelta(hours=1), + stop_time=now, + channels=[channels[0]], + output_format=ExportOutputFormat.CSV, + ) + assert isinstance(job, Job) + assert job.id_ is not None + + @pytest.mark.asyncio + async def test_with_channel_id_string(self, exports_api_async, sift_client): + channels = await sift_client.async_.channels.list_(limit=1) + assert channels + + now = datetime.now(timezone.utc) + job = await exports_api_async.export_by_time_range( + start_time=now - timedelta(hours=1), + stop_time=now, + channels=[channels[0].id_], + output_format=ExportOutputFormat.CSV, + ) + assert isinstance(job, Job) + + class TestWaitAndDownload: + @pytest.mark.asyncio + async def test_full_export_flow(self, exports_api_async, sift_client, tmp_path): + runs = await sift_client.async_.runs.list_(limit=1) + assert runs, "No runs available for integration test" + + job = await exports_api_async.export_by_run( + runs=[runs[0]], + output_format=ExportOutputFormat.CSV, + ) + files = await exports_api_async.wait_and_download( + job=job, + output_dir=tmp_path, + timeout_secs=60, + ) + assert isinstance(files, list) + assert len(files) > 0 + assert all(f.exists() for f in files) + + +@pytest.mark.integration +class TestExportsIntegrationSync: + """Integration tests for the sync Exports API.""" + + def test_export_by_run(self, exports_api_sync, sift_client): + runs = sift_client.runs.list_(limit=1) + assert runs, "No runs available for integration test" + + job = exports_api_sync.export_by_run( + runs=[runs[0]], + output_format=ExportOutputFormat.CSV, + ) + assert isinstance(job, Job) + assert job.id_ is not None + + def test_export_by_asset(self, exports_api_sync, sift_client): + assets = sift_client.assets.list_(limit=1) + assert assets, "No assets available for integration test" + + now = datetime.now(timezone.utc) + job = exports_api_sync.export_by_asset( + assets=[assets[0]], + start_time=now - timedelta(hours=1), + stop_time=now, + output_format=ExportOutputFormat.CSV, + ) + assert isinstance(job, Job) + + def test_export_by_time_range(self, exports_api_sync, sift_client): + channels = sift_client.channels.list_(limit=1) + assert channels, "No channels available for integration test" + + now = datetime.now(timezone.utc) + job = exports_api_sync.export_by_time_range( + start_time=now - timedelta(hours=1), + stop_time=now, + channels=[channels[0]], + output_format=ExportOutputFormat.CSV, + ) + assert isinstance(job, Job) + + @pytest.fixture def mock_client(): """Create a mock SiftClient for unit testing.""" @@ -223,40 +410,6 @@ async def test_delegates_to_low_level_and_returns_job(self, exports_api): split_export_by_run=False, ) - @pytest.mark.asyncio - async def test_minimal_args(self, exports_api): - """Test that minimal arguments are passed correctly with defaults.""" - await exports_api.export_by_run( - runs=["run-1"], - output_format=ExportOutputFormat.SUN, - ) - - exports_api._low_level_client.export_data.assert_awaited_once_with( - run_ids=["run-1"], - asset_ids=None, - output_format=ExportOutputFormat.SUN, - start_time=None, - stop_time=None, - channel_ids=[], - calculated_channels=None, - simplify_channel_names=False, - combine_runs=False, - split_export_by_asset=False, - split_export_by_run=False, - ) - - @pytest.mark.asyncio - async def test_with_calculated_channels(self, exports_api, sample_calc_channels): - """Test that calculated channels are passed through to the low-level client.""" - await exports_api.export_by_run( - runs=["run-1"], - output_format=ExportOutputFormat.CSV, - calculated_channels=sample_calc_channels, - ) - - call_kwargs = exports_api._low_level_client.export_data.call_args.kwargs - assert call_kwargs["calculated_channels"] == sample_calc_channels - @pytest.mark.asyncio async def test_resolves_run_objects_to_ids(self, exports_api): """Test that Run domain objects are resolved to their IDs.""" @@ -344,20 +497,6 @@ async def test_delegates_to_low_level_and_returns_job(self, exports_api): split_export_by_run=False, ) - @pytest.mark.asyncio - async def test_with_calculated_channels(self, exports_api, sample_calc_channels): - await exports_api.export_by_asset( - assets=["asset-1"], - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.CSV, - calculated_channels=sample_calc_channels, - ) - - call_kwargs = exports_api._low_level_client.export_data.call_args.kwargs - assert call_kwargs["calculated_channels"] == sample_calc_channels - assert call_kwargs["channel_ids"] == [] - @pytest.mark.asyncio async def test_resolves_asset_objects_to_ids(self, exports_api): mock_asset = MagicMock(spec=Asset) @@ -419,21 +558,6 @@ async def test_delegates_to_low_level_with_channels(self, exports_api): split_export_by_run=False, ) - @pytest.mark.asyncio - async def test_delegates_to_low_level_with_calc_channels( - self, exports_api, sample_calc_channels - ): - await exports_api.export_by_time_range( - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.CSV, - calculated_channels=sample_calc_channels, - ) - - call_kwargs = exports_api._low_level_client.export_data.call_args.kwargs - assert call_kwargs["calculated_channels"] == sample_calc_channels - assert call_kwargs["channel_ids"] == [] - @pytest.mark.asyncio async def test_no_channels_raises(self, exports_api): with pytest.raises(ValueError, match=r"channels.*calculated_channels"): @@ -500,19 +624,10 @@ async def test_passes_through_none(self, exports_api): assert result is None @pytest.mark.asyncio - async def test_preserves_objects_when_identifiers_not_found( - self, exports_api, sample_calc_channels - ): - """channels.find returns None → identifiers assumed to be UUIDs, objects preserved.""" - result = await exports_api._resolve_calculated_channels(sample_calc_channels) - assert result[0] == sample_calc_channels[0] - assert result[1] == sample_calc_channels[1] - - @pytest.mark.asyncio - async def test_resolves_fetched_calculated_channel( + async def test_resolves_name_to_uuid( self, exports_api, mock_client, mock_calculated_channel, mock_resolved_channel ): - """A fetched CalculatedChannel's name-based identifier is resolved to a UUID.""" + """Name-based identifier is resolved to a UUID via channels.find.""" mock_client.async_.channels.find = AsyncMock(return_value=mock_resolved_channel) result = await exports_api._resolve_calculated_channels([mock_calculated_channel]) @@ -520,66 +635,15 @@ async def test_resolves_fetched_calculated_channel( assert len(result) == 1 resolved = result[0] assert isinstance(resolved, CalculatedChannelCreate) - assert resolved.name == "my_calc" - assert resolved.expression == "$1 + 10" - assert resolved.units == "m/s" - assert resolved.expression_channel_references is not None assert ( resolved.expression_channel_references[0].channel_identifier == "resolved-ch-uuid" ) - mock_client.async_.channels.find.assert_awaited_once_with( - name="sensor.velocity", assets=["asset-1"] - ) - - @pytest.mark.asyncio - async def test_keeps_identifier_when_not_found(self, exports_api, mock_calculated_channel): - """channels.find returns None → identifier kept as-is.""" - mock_calculated_channel.channel_references = [ - ChannelReference( - channel_reference="$1", - channel_identifier="d8e64798-ad6f-41b8-b830-7e009806f365", - ), - ] - - result = await exports_api._resolve_calculated_channels([mock_calculated_channel]) - resolved = result[0] - assert isinstance(resolved, CalculatedChannelCreate) - assert resolved.expression_channel_references is not None - assert ( - resolved.expression_channel_references[0].channel_identifier - == "d8e64798-ad6f-41b8-b830-7e009806f365" - ) @pytest.mark.asyncio - async def test_resolves_create_object_with_name_identifier( - self, exports_api, mock_client, mock_resolved_channel - ): - """A CalculatedChannelCreate with a name-based identifier gets resolved.""" - mock_resolved_channel._id_or_error = "d8e64798-ad6f-41b8-b830-7e009806f365" - mock_client.async_.channels.find = AsyncMock(return_value=mock_resolved_channel) - - inline_cc = CalculatedChannelCreate( - name="inline_calc", - expression="$1 + 30", - expression_channel_references=[ - ChannelReference( - channel_reference="$1", channel_identifier="DiningRoomLight.rssi" - ), - ], - ) - - result = await exports_api._resolve_calculated_channels([inline_cc]) - - resolved = result[0] - assert isinstance(resolved, CalculatedChannelCreate) - assert resolved.expression_channel_references is not None - assert ( - resolved.expression_channel_references[0].channel_identifier - == "d8e64798-ad6f-41b8-b830-7e009806f365" - ) - mock_client.async_.channels.find.assert_awaited_once_with( - name="DiningRoomLight.rssi", assets=None - ) + async def test_keeps_identifier_when_not_found(self, exports_api, sample_calc_channels): + """channels.find returns None → identifiers kept as-is.""" + result = await exports_api._resolve_calculated_channels(sample_calc_channels) + assert result[0] == sample_calc_channels[0] @pytest.mark.asyncio async def test_mixed_create_and_existing( From 3fb45575dd7bb1b84e872a54afe5fee3d49af324 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 18 Mar 2026 17:28:09 -0700 Subject: [PATCH 36/53] pyright fix --- python/lib/sift_client/_tests/resources/test_exports.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index e6981918d..76a7aea86 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -631,10 +631,12 @@ async def test_resolves_name_to_uuid( mock_client.async_.channels.find = AsyncMock(return_value=mock_resolved_channel) result = await exports_api._resolve_calculated_channels([mock_calculated_channel]) + assert result is not None assert len(result) == 1 resolved = result[0] assert isinstance(resolved, CalculatedChannelCreate) + assert resolved.expression_channel_references is not None assert ( resolved.expression_channel_references[0].channel_identifier == "resolved-ch-uuid" ) From faf4af83c4ec844adb378a96c3743802074bfff1 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 18 Mar 2026 17:42:33 -0700 Subject: [PATCH 37/53] move _resolve_calculated_channels and download_and_extract_zip to _internal/util --- .../sift_client/_internal/util/channels.py | 54 ++++++++++++++++++ .../{ => _internal}/util/download.py | 0 .../_tests/resources/test_exports.py | 45 +++++++++------ python/lib/sift_client/resources/exports.py | 56 +++---------------- 4 files changed, 90 insertions(+), 65 deletions(-) create mode 100644 python/lib/sift_client/_internal/util/channels.py rename python/lib/sift_client/{ => _internal}/util/download.py (100%) diff --git a/python/lib/sift_client/_internal/util/channels.py b/python/lib/sift_client/_internal/util/channels.py new file mode 100644 index 000000000..ae9b975b6 --- /dev/null +++ b/python/lib/sift_client/_internal/util/channels.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from sift_client.sift_types.calculated_channel import CalculatedChannel, CalculatedChannelCreate +from sift_client.sift_types.channel import ChannelReference + +if TYPE_CHECKING: + from sift_client.resources.channels import ChannelsAPIAsync + + +async def resolve_calculated_channels( + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None, + channels_api: ChannelsAPIAsync, +) -> list[CalculatedChannel | CalculatedChannelCreate] | None: + """Resolve channel reference identifiers from names to UUIDs. + + For each channel reference, looks up the identifier as a channel name. + If found, replaces it with the channel's UUID. If not found, assumes + the identifier is already a UUID and keeps it as-is. + """ + if not calculated_channels: + return calculated_channels + + resolved: list[CalculatedChannel | CalculatedChannelCreate] = [] + for cc in calculated_channels: + refs = ( + (cc.expression_channel_references or []) + if isinstance(cc, CalculatedChannelCreate) + else cc.channel_references + ) + + resolved_refs: list[ChannelReference] = [] + for ref in refs: + channel = await channels_api.find( + name=ref.channel_identifier, + assets=cc.asset_ids, + ) + if channel is not None: + ref = ChannelReference( + channel_reference=ref.channel_reference, + channel_identifier=channel._id_or_error, + ) + resolved_refs.append(ref) + + resolved.append( + CalculatedChannelCreate( + name=cc.name, + expression=cc.expression, + expression_channel_references=resolved_refs, + units=cc.units or None, + ) + ) + return resolved diff --git a/python/lib/sift_client/util/download.py b/python/lib/sift_client/_internal/util/download.py similarity index 100% rename from python/lib/sift_client/util/download.py rename to python/lib/sift_client/_internal/util/download.py diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 76a7aea86..25aae85a9 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -16,6 +16,7 @@ import pytest from sift_client._internal.low_level_wrappers.exports import _build_calc_channel_configs +from sift_client._internal.util.channels import resolve_calculated_channels if TYPE_CHECKING: from sift_client import SiftClient @@ -616,21 +617,24 @@ async def test_combine_runs_with_split_by_run_raises(self, exports_api, method, ) class TestResolveCalculatedChannels: - """Tests for the _resolve_calculated_channels helper.""" + """Tests for the resolve_calculated_channels utility.""" @pytest.mark.asyncio - async def test_passes_through_none(self, exports_api): - result = await exports_api._resolve_calculated_channels(None) + async def test_passes_through_none(self): + mock_channels_api = MagicMock() + mock_channels_api.find = AsyncMock(return_value=None) + result = await resolve_calculated_channels(None, channels_api=mock_channels_api) assert result is None @pytest.mark.asyncio - async def test_resolves_name_to_uuid( - self, exports_api, mock_client, mock_calculated_channel, mock_resolved_channel - ): - """Name-based identifier is resolved to a UUID via channels.find.""" - mock_client.async_.channels.find = AsyncMock(return_value=mock_resolved_channel) + async def test_resolves_name_to_uuid(self, mock_calculated_channel, mock_resolved_channel): + """Name-based identifier is resolved to a UUID via channels_api.find.""" + mock_channels_api = MagicMock() + mock_channels_api.find = AsyncMock(return_value=mock_resolved_channel) - result = await exports_api._resolve_calculated_channels([mock_calculated_channel]) + result = await resolve_calculated_channels( + [mock_calculated_channel], channels_api=mock_channels_api + ) assert result is not None assert len(result) == 1 @@ -642,16 +646,19 @@ async def test_resolves_name_to_uuid( ) @pytest.mark.asyncio - async def test_keeps_identifier_when_not_found(self, exports_api, sample_calc_channels): - """channels.find returns None → identifiers kept as-is.""" - result = await exports_api._resolve_calculated_channels(sample_calc_channels) + async def test_keeps_identifier_when_not_found(self, sample_calc_channels): + """channels_api.find returns None → identifiers kept as-is.""" + mock_channels_api = MagicMock() + mock_channels_api.find = AsyncMock(return_value=None) + result = await resolve_calculated_channels( + sample_calc_channels, channels_api=mock_channels_api + ) + assert result is not None assert result[0] == sample_calc_channels[0] @pytest.mark.asyncio async def test_mixed_create_and_existing( self, - exports_api, - mock_client, sample_calc_channels, mock_calculated_channel, mock_resolved_channel, @@ -665,15 +672,19 @@ async def test_mixed_create_and_existing( async def find_side_effect(name, assets=None): return mock_resolved_channel if name == "sensor.rpm" else None - mock_client.async_.channels.find = AsyncMock(side_effect=find_side_effect) + mock_channels_api = MagicMock() + mock_channels_api.find = AsyncMock(side_effect=find_side_effect) - result = await exports_api._resolve_calculated_channels( - [sample_calc_channels[0], mock_calculated_channel] + result = await resolve_calculated_channels( + [sample_calc_channels[0], mock_calculated_channel], + channels_api=mock_channels_api, ) + assert result is not None assert len(result) == 2 assert result[0] == sample_calc_channels[0] assert isinstance(result[1], CalculatedChannelCreate) + assert result[1].expression_channel_references is not None assert result[1].expression_channel_references[0].channel_identifier == "rpm-uuid" class TestWaitUntilComplete: diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index c1e30f925..e7c7f4e2f 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -6,19 +6,20 @@ from typing import TYPE_CHECKING from sift_client._internal.low_level_wrappers.exports import ExportsLowLevelClient +from sift_client._internal.util.channels import resolve_calculated_channels +from sift_client._internal.util.download import download_and_extract_zip from sift_client.resources._base import ResourceBase from sift_client.sift_types.asset import Asset -from sift_client.sift_types.calculated_channel import CalculatedChannel, CalculatedChannelCreate -from sift_client.sift_types.channel import Channel, ChannelReference +from sift_client.sift_types.channel import Channel from sift_client.sift_types.export import ExportOutputFormat # noqa: TC001 from sift_client.sift_types.job import Job from sift_client.sift_types.run import Run -from sift_client.util.download import download_and_extract_zip if TYPE_CHECKING: from datetime import datetime from sift_client.client import SiftClient + from sift_client.sift_types.calculated_channel import CalculatedChannel, CalculatedChannelCreate class ExportsAPIAsync(ResourceBase): @@ -33,50 +34,6 @@ def __init__(self, sift_client: SiftClient): super().__init__(sift_client) self._low_level_client = ExportsLowLevelClient(grpc_client=self.client.grpc_client) - async def _resolve_calculated_channels( - self, - calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None, - ) -> list[CalculatedChannel | CalculatedChannelCreate] | None: - """Resolve channel reference identifiers from names to UUIDs. - - For each channel reference, looks up the identifier as a channel name. - If found, replaces it with the channel's UUID. If not found, assumes - the identifier is already a UUID and keeps it as-is. - """ - if not calculated_channels: - return calculated_channels - - resolved: list[CalculatedChannel | CalculatedChannelCreate] = [] - for cc in calculated_channels: - refs = ( - (cc.expression_channel_references or []) - if isinstance(cc, CalculatedChannelCreate) - else cc.channel_references - ) - - resolved_refs: list[ChannelReference] = [] - for ref in refs: - channel = await self.client.async_.channels.find( - name=ref.channel_identifier, - assets=cc.asset_ids, - ) - if channel is not None: - ref = ChannelReference( - channel_reference=ref.channel_reference, - channel_identifier=channel._id_or_error, - ) - resolved_refs.append(ref) - - resolved.append( - CalculatedChannelCreate( - name=cc.name, - expression=cc.expression, - expression_channel_references=resolved_refs, - units=cc.units or None, - ) - ) - return resolved - async def _export( self, *, @@ -109,7 +66,10 @@ async def _export( channel_ids = ( [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] ) - resolved_calc_channels = await self._resolve_calculated_channels(calculated_channels) + resolved_calc_channels = await resolve_calculated_channels( + calculated_channels, + channels_api=self.client.async_.channels, + ) job_id = await self._low_level_client.export_data( run_ids=run_ids, From edca0adadafec3390ed97bfd6975675692d7fe9a Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Wed, 18 Mar 2026 18:16:58 -0700 Subject: [PATCH 38/53] add extract parameter to wait_and_download can keep the zip without extracting --- .../sift_client/_internal/util/channels.py | 2 +- .../sift_client/_internal/util/download.py | 20 ++++++++++++------- .../_tests/resources/test_exports.py | 20 +++++++++++++++++++ python/lib/sift_client/resources/exports.py | 10 ++++++++-- .../resources/sync_stubs/__init__.pyi | 7 ++++++- 5 files changed, 48 insertions(+), 11 deletions(-) diff --git a/python/lib/sift_client/_internal/util/channels.py b/python/lib/sift_client/_internal/util/channels.py index ae9b975b6..a3054f317 100644 --- a/python/lib/sift_client/_internal/util/channels.py +++ b/python/lib/sift_client/_internal/util/channels.py @@ -12,7 +12,7 @@ async def resolve_calculated_channels( calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None, channels_api: ChannelsAPIAsync, -) -> list[CalculatedChannel | CalculatedChannelCreate] | None: +) -> list[CalculatedChannelCreate] | None: """Resolve channel reference identifiers from names to UUIDs. For each channel reference, looks up the identifier as a channel name. diff --git a/python/lib/sift_client/_internal/util/download.py b/python/lib/sift_client/_internal/util/download.py index 5612df6fa..fce09f489 100644 --- a/python/lib/sift_client/_internal/util/download.py +++ b/python/lib/sift_client/_internal/util/download.py @@ -1,7 +1,6 @@ from __future__ import annotations import zipfile -from pathlib import Path from typing import TYPE_CHECKING import requests @@ -10,20 +9,25 @@ from pathlib import Path -def download_and_extract_zip(url: str, zip_path: Path, output_dir: Path) -> list[Path]: - """Download a zip file from a URL and extract its contents. +def download_and_extract_zip( + url: str, zip_path: Path, output_dir: Path, *, extract: bool = True +) -> list[Path]: + """Download a zip file from a URL and optionally extract its contents. - Downloads the file in streaming 4 MiB chunks, extracts all contents - to the output directory, then removes the zip file. + Downloads the file in streaming 4 MiB chunks. If extract is True, + extracts all contents to the output directory and removes the zip file. Args: url: The URL to download the zip file from. - zip_path: Path where the zip file will be temporarily saved. + zip_path: Path where the zip file will be saved. output_dir: Directory to extract the zip contents into. Created if it doesn't exist. + extract: If True (default), extract the zip and delete it. + If False, keep the zip file as-is. Returns: - List of paths to the extracted files (excludes directories). + List of paths to the extracted files (excludes directories), + or a single-element list containing the zip path if extract is False. Raises: requests.HTTPError: If the download request fails. @@ -36,6 +40,8 @@ def download_and_extract_zip(url: str, zip_path: Path, output_dir: Path) -> list for chunk in response.iter_content(chunk_size=4194304): # 4 MiB if chunk: file.write(chunk) + if not extract: + return [zip_path] with zipfile.ZipFile(zip_path, "r") as zip_file: names = zip_file.namelist() zip_file.extractall(output_dir) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 25aae85a9..e60c1c562 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -717,6 +717,26 @@ async def test_accepts_job_id_string(self, completed_export_setup): job="job-456", polling_interval_secs=5, timeout_secs=None ) + @pytest.mark.asyncio + async def test_extract_false_returns_zip_path(self, completed_export_setup): + s = completed_export_setup + mock_job = MagicMock(spec=Job) + mock_job._id_or_error = "job-123" + + fake_zip = s["tmp_path"] / "job-123.zip" + s["mock_loop"].run_in_executor = AsyncMock(return_value=[fake_zip]) + + with patch("asyncio.get_running_loop", return_value=s["mock_loop"]): + result = await s["api"].wait_and_download( + job=mock_job, output_dir=s["tmp_path"], extract=False + ) + + assert result == [fake_zip] + # Verify the lambda called download_and_extract_zip with extract=False + call_args = s["mock_loop"].run_in_executor.call_args + fn = call_args[0][1] # the lambda + assert callable(fn) + @pytest.mark.asyncio async def test_custom_polling_and_timeout(self, completed_export_setup): s = completed_export_setup diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index e7c7f4e2f..69034120a 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -268,6 +268,7 @@ async def wait_and_download( polling_interval_secs: int = 5, timeout_secs: int | None = None, output_dir: str | Path | None = None, + extract: bool = True, ) -> list[Path]: """Wait for an export job to complete and download the exported files. @@ -280,9 +281,13 @@ async def wait_and_download( timeout_secs: Maximum seconds to wait. If None, polls indefinitely. output_dir: Directory to save the extracted files. If omitted, a temporary directory is created automatically. + extract: If True (default), extract the zip and delete it, + returning paths to the extracted files. If False, keep the + zip file and return its path. Returns: - List of paths to the extracted data files. + List of paths to the extracted data files, or a single-element + list containing the zip path if extract is False. Raises: RuntimeError: If the export job fails or is cancelled. @@ -320,7 +325,8 @@ async def wait_and_download( # Run the synchronous download in a thread pool to avoid blocking the event loop loop = asyncio.get_running_loop() extracted_files = await loop.run_in_executor( - None, download_and_extract_zip, presigned_url, zip_path, output_dir + None, + lambda: download_and_extract_zip(presigned_url, zip_path, output_dir, extract=extract), ) return extracted_files diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index fe00829aa..c56b9d6c2 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -680,6 +680,7 @@ class ExportsAPI: polling_interval_secs: int = 5, timeout_secs: int | None = None, output_dir: str | Path | None = None, + extract: bool = True, ) -> list[Path]: """Wait for an export job to complete and download the exported files. @@ -692,9 +693,13 @@ class ExportsAPI: timeout_secs: Maximum seconds to wait. If None, polls indefinitely. output_dir: Directory to save the extracted files. If omitted, a temporary directory is created automatically. + extract: If True (default), extract the zip and delete it, + returning paths to the extracted files. If False, keep the + zip file and return its path. Returns: - List of paths to the extracted data files. + List of paths to the extracted data files, or a single-element + list containing the zip path if extract is False. Raises: RuntimeError: If the export job fails or is cancelled. From 4194dd0c5e44dcb6fbcf68b6ad18cde315e45ceb Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 19 Mar 2026 13:03:54 -0700 Subject: [PATCH 39/53] refactored export methods to a single entry point --- .../_internal/low_level_wrappers/exports.py | 47 +- .../_tests/resources/test_exports.py | 1012 ++++++----------- python/lib/sift_client/resources/exports.py | 249 +--- .../resources/sync_stubs/__init__.pyi | 135 +-- 4 files changed, 455 insertions(+), 988 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/exports.py b/python/lib/sift_client/_internal/low_level_wrappers/exports.py index f7005d2dd..326462bf1 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/exports.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/exports.py @@ -90,24 +90,10 @@ async def export_data( ) -> str: """Initiate a data export. - Builds the ExportDataRequest proto and makes the gRPC call. The export - scope is determined by which ID list is provided: - - run_ids: export by run - - asset_ids: export by asset (requires start_time/stop_time) - - neither: export by time range (requires start_time/stop_time) - - Args: - output_format: The export format enum. - run_ids: Optional list of run IDs (export by run). - asset_ids: Optional list of asset IDs (export by asset). - start_time: Optional start time for the export. - stop_time: Optional stop time for the export. - channel_ids: Optional list of channel IDs to include. - calculated_channels: Optional calculated channel objects to include. - simplify_channel_names: Simplify channel names if unique. - combine_runs: Combine identical channels across runs. - split_export_by_asset: Split export by asset. - split_export_by_run: Split export by run. + Builds the ExportDataRequest proto and makes the gRPC call. + Sets whichever time_selection oneof fields are provided + (run_ids, asset_ids, or time range); the server validates + the request. Returns: The job ID for the background export. @@ -133,25 +119,20 @@ async def export_data( runs_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) request.runs_and_time_range.CopyFrom(runs_and_time_range) - elif asset_ids is not None: - if start_time is None or stop_time is None: - raise ValueError( - "start_time and stop_time must be provided when exporting by asset." - ) - + if asset_ids is not None: assets_and_time_range = AssetsAndTimeRange(asset_ids=asset_ids) - assets_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) - assets_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) + if start_time: + assets_and_time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) + if stop_time: + assets_and_time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) request.assets_and_time_range.CopyFrom(assets_and_time_range) - else: - if start_time is None or stop_time is None: - raise ValueError( - "start_time and stop_time must be provided when exporting by time range." - ) + if run_ids is None and asset_ids is None: time_range = TimeRange() - time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) - time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) + if start_time: + time_range.start_time.CopyFrom(to_pb_timestamp(start_time)) + if stop_time: + time_range.stop_time.CopyFrom(to_pb_timestamp(stop_time)) request.time_range.CopyFrom(time_range) response = await self._grpc_client.get_stub(ExportServiceStub).ExportData(request) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index e60c1c562..455b1463f 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -1,11 +1,4 @@ -"""Pytest tests for the Exports API. - -These tests validate the usage of the ExportsAPIAsync including: -- Correct delegation to the low-level client for all three export methods -- Domain object resolution (Run -> run_id, Asset -> asset_id, Channel -> channel_id) -- Job lifecycle: export methods return Job, wait_and_download returns list of file paths -- Input validation and error handling -""" +"""Tests for the Exports API.""" from __future__ import annotations @@ -35,14 +28,7 @@ START = datetime(2025, 1, 1, tzinfo=timezone.utc) STOP = datetime(2025, 1, 2, tzinfo=timezone.utc) - - -@pytest.mark.integration -def test_client_binding(sift_client): - assert sift_client.exports - assert isinstance(sift_client.exports, ExportsAPI) - assert sift_client.async_.exports - assert isinstance(sift_client.async_.exports, ExportsAPIAsync) +CSV = ExportOutputFormat.CSV @pytest.fixture @@ -55,173 +41,8 @@ def exports_api_sync(sift_client: SiftClient): return sift_client.exports -@pytest.mark.integration -class TestExportsIntegrationAsync: - """Integration tests for the async Exports API.""" - - class TestExportByRun: - @pytest.mark.asyncio - async def test_basic(self, exports_api_async, sift_client): - runs = await sift_client.async_.runs.list_(limit=1) - assert runs, "No runs available for integration test" - - job = await exports_api_async.export_by_run( - runs=[runs[0]], - output_format=ExportOutputFormat.CSV, - ) - assert isinstance(job, Job) - assert job.id_ is not None - - @pytest.mark.asyncio - async def test_with_run_id_string(self, exports_api_async, sift_client): - runs = await sift_client.async_.runs.list_(limit=1) - assert runs - - job = await exports_api_async.export_by_run( - runs=[runs[0].id_], - output_format=ExportOutputFormat.CSV, - ) - assert isinstance(job, Job) - - @pytest.mark.asyncio - async def test_with_time_range(self, exports_api_async, sift_client): - runs = await sift_client.async_.runs.list_(limit=1) - assert runs - - now = datetime.now(timezone.utc) - job = await exports_api_async.export_by_run( - runs=[runs[0]], - start_time=now - timedelta(hours=1), - stop_time=now, - output_format=ExportOutputFormat.CSV, - ) - assert isinstance(job, Job) - - class TestExportByAsset: - @pytest.mark.asyncio - async def test_basic(self, exports_api_async, sift_client): - assets = await sift_client.async_.assets.list_(limit=1) - assert assets, "No assets available for integration test" - - now = datetime.now(timezone.utc) - job = await exports_api_async.export_by_asset( - assets=[assets[0]], - start_time=now - timedelta(hours=1), - stop_time=now, - output_format=ExportOutputFormat.CSV, - ) - assert isinstance(job, Job) - assert job.id_ is not None - - @pytest.mark.asyncio - async def test_with_asset_id_string(self, exports_api_async, sift_client): - assets = await sift_client.async_.assets.list_(limit=1) - assert assets - - now = datetime.now(timezone.utc) - job = await exports_api_async.export_by_asset( - assets=[assets[0].id_], - start_time=now - timedelta(hours=1), - stop_time=now, - output_format=ExportOutputFormat.CSV, - ) - assert isinstance(job, Job) - - class TestExportByTimeRange: - @pytest.mark.asyncio - async def test_with_channels(self, exports_api_async, sift_client): - channels = await sift_client.async_.channels.list_(limit=1) - assert channels, "No channels available for integration test" - - now = datetime.now(timezone.utc) - job = await exports_api_async.export_by_time_range( - start_time=now - timedelta(hours=1), - stop_time=now, - channels=[channels[0]], - output_format=ExportOutputFormat.CSV, - ) - assert isinstance(job, Job) - assert job.id_ is not None - - @pytest.mark.asyncio - async def test_with_channel_id_string(self, exports_api_async, sift_client): - channels = await sift_client.async_.channels.list_(limit=1) - assert channels - - now = datetime.now(timezone.utc) - job = await exports_api_async.export_by_time_range( - start_time=now - timedelta(hours=1), - stop_time=now, - channels=[channels[0].id_], - output_format=ExportOutputFormat.CSV, - ) - assert isinstance(job, Job) - - class TestWaitAndDownload: - @pytest.mark.asyncio - async def test_full_export_flow(self, exports_api_async, sift_client, tmp_path): - runs = await sift_client.async_.runs.list_(limit=1) - assert runs, "No runs available for integration test" - - job = await exports_api_async.export_by_run( - runs=[runs[0]], - output_format=ExportOutputFormat.CSV, - ) - files = await exports_api_async.wait_and_download( - job=job, - output_dir=tmp_path, - timeout_secs=60, - ) - assert isinstance(files, list) - assert len(files) > 0 - assert all(f.exists() for f in files) - - -@pytest.mark.integration -class TestExportsIntegrationSync: - """Integration tests for the sync Exports API.""" - - def test_export_by_run(self, exports_api_sync, sift_client): - runs = sift_client.runs.list_(limit=1) - assert runs, "No runs available for integration test" - - job = exports_api_sync.export_by_run( - runs=[runs[0]], - output_format=ExportOutputFormat.CSV, - ) - assert isinstance(job, Job) - assert job.id_ is not None - - def test_export_by_asset(self, exports_api_sync, sift_client): - assets = sift_client.assets.list_(limit=1) - assert assets, "No assets available for integration test" - - now = datetime.now(timezone.utc) - job = exports_api_sync.export_by_asset( - assets=[assets[0]], - start_time=now - timedelta(hours=1), - stop_time=now, - output_format=ExportOutputFormat.CSV, - ) - assert isinstance(job, Job) - - def test_export_by_time_range(self, exports_api_sync, sift_client): - channels = sift_client.channels.list_(limit=1) - assert channels, "No channels available for integration test" - - now = datetime.now(timezone.utc) - job = exports_api_sync.export_by_time_range( - start_time=now - timedelta(hours=1), - stop_time=now, - channels=[channels[0]], - output_format=ExportOutputFormat.CSV, - ) - assert isinstance(job, Job) - - @pytest.fixture def mock_client(): - """Create a mock SiftClient for unit testing.""" client = MagicMock() client.grpc_client = MagicMock() client.async_ = MagicMock() @@ -233,7 +54,6 @@ def mock_client(): @pytest.fixture def mock_job(): - """Create a mock Job returned by jobs.get.""" job = MagicMock(spec=Job) job._id_or_error = "job-123" job.job_status = JobStatus.FINISHED @@ -242,7 +62,6 @@ def mock_job(): @pytest.fixture def exports_api(mock_client, mock_job): - """Create an ExportsAPIAsync with a mocked low-level client.""" with patch("sift_client.resources.exports.ExportsLowLevelClient", autospec=True) as mock_ll: api = ExportsAPIAsync(mock_client) api._low_level_client = mock_ll.return_value @@ -251,530 +70,379 @@ def exports_api(mock_client, mock_job): return api -@pytest.fixture -def sample_calc_channels(): - """Create sample calculated channel definitions for testing.""" - return [ - CalculatedChannelCreate( - name="speed_doubled", - expression="$1 * 2", - expression_channel_references=[ - ChannelReference(channel_reference="$1", channel_identifier="ch-uuid-1"), - ], - units="m/s", - ), - CalculatedChannelCreate( - name="no_units", - expression="$1 + $2", - expression_channel_references=[ - ChannelReference(channel_reference="$1", channel_identifier="ch-uuid-1"), - ChannelReference(channel_reference="$2", channel_identifier="ch-uuid-2"), - ], - ), - ] - +@pytest.mark.integration +def test_client_binding(sift_client): + assert isinstance(sift_client.exports, ExportsAPI) + assert isinstance(sift_client.async_.exports, ExportsAPIAsync) -@pytest.fixture -def mock_calculated_channel(): - """Create a mock fetched CalculatedChannel with name-based channel_identifier.""" - cc = MagicMock(spec=CalculatedChannel) - cc.name = "my_calc" - cc.expression = "$1 + 10" - cc.units = "m/s" - cc.asset_ids = ["asset-1"] - cc.channel_references = [ - ChannelReference(channel_reference="$1", channel_identifier="sensor.velocity"), - ] - return cc +@pytest.mark.integration +class TestExportsIntegration: + @pytest.mark.asyncio + async def test_export_by_run(self, exports_api_async, sift_client): + runs = await sift_client.async_.runs.list_(limit=1) + assert runs, "No runs available" + job = await exports_api_async.export(runs=[runs[0]], output_format=CSV) + assert isinstance(job, Job) + assert job.id_ is not None -@pytest.fixture -def mock_resolved_channel(): - """Create a mock Channel returned by channels.find during resolution.""" - ch = MagicMock(spec=Channel) - ch._id_or_error = "resolved-ch-uuid" - return ch + @pytest.mark.asyncio + async def test_export_by_asset(self, exports_api_async, sift_client): + assets = await sift_client.async_.assets.list_(limit=1) + assert assets, "No assets available" + now = datetime.now(timezone.utc) + job = await exports_api_async.export( + assets=[assets[0]], + start_time=now - timedelta(hours=1), + stop_time=now, + output_format=CSV, + ) + assert isinstance(job, Job) + @pytest.mark.asyncio + async def test_export_by_time_range(self, exports_api_async, sift_client): + channels = await sift_client.async_.channels.list_(limit=1) + assert channels, "No channels available" + now = datetime.now(timezone.utc) + job = await exports_api_async.export( + start_time=now - timedelta(hours=1), + stop_time=now, + channels=[channels[0]], + output_format=CSV, + ) + assert isinstance(job, Job) -@pytest.fixture -def completed_export_setup(exports_api, mock_client, tmp_path): - """Set up mocks for a successful wait_and_download call. + @pytest.mark.asyncio + async def test_wait_and_download(self, exports_api_async, sift_client, tmp_path): + runs = await sift_client.async_.runs.list_(limit=1) + assert runs, "No runs available" + job = await exports_api_async.export(runs=[runs[0]], output_format=CSV) + files = await exports_api_async.wait_and_download( + job=job, output_dir=tmp_path, timeout_secs=60 + ) + assert len(files) > 0 + assert all(f.exists() for f in files) - Returns a dict with the exports_api, mock_client, tmp_path, and fake_file. - """ - completed_job = MagicMock(spec=Job) - completed_job.job_status = JobStatus.FINISHED - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) - exports_api._low_level_client.get_download_url = AsyncMock( - return_value="https://download.test/export.zip" - ) + def test_sync_export_by_run(self, exports_api_sync, sift_client): + runs = sift_client.runs.list_(limit=1) + assert runs, "No runs available" + job = exports_api_sync.export(runs=[runs[0]], output_format=CSV) + assert isinstance(job, Job) - fake_file = tmp_path / "data.csv" - fake_file.write_text("col1,col2\n1,2") + def test_sync_export_by_asset(self, exports_api_sync, sift_client): + assets = sift_client.assets.list_(limit=1) + assert assets, "No assets available" + now = datetime.now(timezone.utc) + job = exports_api_sync.export( + assets=[assets[0]], + start_time=now - timedelta(hours=1), + stop_time=now, + output_format=CSV, + ) + assert isinstance(job, Job) - mock_loop = MagicMock() - mock_loop.run_in_executor = AsyncMock(return_value=[fake_file]) + def test_sync_export_by_time_range(self, exports_api_sync, sift_client): + channels = sift_client.channels.list_(limit=1) + assert channels, "No channels available" + now = datetime.now(timezone.utc) + job = exports_api_sync.export( + start_time=now - timedelta(hours=1), + stop_time=now, + channels=[channels[0]], + output_format=CSV, + ) + assert isinstance(job, Job) - return { - "api": exports_api, - "client": mock_client, - "tmp_path": tmp_path, - "fake_file": fake_file, - "mock_loop": mock_loop, - } +class TestExportDelegation: + """Verify each mode correctly delegates to the low-level client.""" + + @pytest.mark.asyncio + async def test_by_runs(self, exports_api): + await exports_api.export( + runs=["run-1", "run-2"], + output_format=CSV, + start_time=START, + stop_time=STOP, + channels=["ch-1"], + simplify_channel_names=True, + combine_runs=True, + split_export_by_asset=True, + ) + exports_api._low_level_client.export_data.assert_awaited_once_with( + run_ids=["run-1", "run-2"], + asset_ids=None, + output_format=CSV, + start_time=START, + stop_time=STOP, + channel_ids=["ch-1"], + calculated_channels=None, + simplify_channel_names=True, + combine_runs=True, + split_export_by_asset=True, + split_export_by_run=False, + ) -class TestBuildCalcChannelConfigs: - """Tests for the _build_calc_channel_configs helper in the low-level client.""" + @pytest.mark.asyncio + async def test_by_assets(self, exports_api): + await exports_api.export( + assets=["asset-1"], + start_time=START, + stop_time=STOP, + output_format=CSV, + channels=["ch-1", "ch-2"], + ) + exports_api._low_level_client.export_data.assert_awaited_once_with( + run_ids=None, + asset_ids=["asset-1"], + start_time=START, + stop_time=STOP, + output_format=CSV, + channel_ids=["ch-1", "ch-2"], + calculated_channels=None, + simplify_channel_names=False, + combine_runs=False, + split_export_by_asset=False, + split_export_by_run=False, + ) - @pytest.mark.parametrize("input_val", [None, []]) - def test_returns_empty_list_for_empty_input(self, input_val): - """Test that None or empty list returns an empty list.""" - assert _build_calc_channel_configs(input_val) == [] + @pytest.mark.asyncio + async def test_by_time_range(self, exports_api): + await exports_api.export( + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.SUN, + channels=["ch-1"], + ) + exports_api._low_level_client.export_data.assert_awaited_once_with( + run_ids=None, + asset_ids=None, + start_time=START, + stop_time=STOP, + output_format=ExportOutputFormat.SUN, + channel_ids=["ch-1"], + calculated_channels=None, + simplify_channel_names=False, + combine_runs=False, + split_export_by_asset=False, + split_export_by_run=False, + ) - def test_converts_create_objects_to_proto(self, sample_calc_channels): - """Test converting CalculatedChannelCreate objects to proto CalculatedChannelConfig.""" - result = _build_calc_channel_configs(sample_calc_channels) - assert len(result) == 2 - first = result[0] - assert first.name == "speed_doubled" - assert first.expression == "$1 * 2" - assert first.units == "m/s" - assert len(first.channel_references) == 1 - assert first.channel_references[0].channel_reference == "$1" - assert first.channel_references[0].channel_identifier == "ch-uuid-1" - - second = result[1] - assert second.name == "no_units" - assert second.units == "" # proto default for unset optional string - assert len(second.channel_references) == 2 - - def test_converts_existing_calculated_channel_to_proto(self): - """Test converting an existing CalculatedChannel (full model) to proto. - - Exercises the else-branch that reads from 'channel_references' - instead of 'expression_channel_references'. - """ - mock_cc = MagicMock(spec=CalculatedChannel) - mock_cc.name = "derived_speed" - mock_cc.expression = "$1 / $2" - mock_cc.channel_references = [ - ChannelReference(channel_reference="$1", channel_identifier="ch-distance"), - ChannelReference(channel_reference="$2", channel_identifier="ch-time"), +class TestDomainObjectResolution: + @pytest.mark.asyncio + async def test_run_objects_to_ids(self, exports_api): + mock_run = MagicMock(spec=Run) + mock_run._id_or_error = "resolved-run-id" + await exports_api.export(runs=[mock_run, "raw-id"], output_format=CSV) + assert exports_api._low_level_client.export_data.call_args.kwargs["run_ids"] == [ + "resolved-run-id", + "raw-id", ] - mock_cc.units = "m/s" - - result = _build_calc_channel_configs([mock_cc]) - assert len(result) == 1 - config = result[0] - assert config.name == "derived_speed" - assert config.expression == "$1 / $2" - assert config.units == "m/s" - assert len(config.channel_references) == 2 - assert config.channel_references[0].channel_identifier == "ch-distance" - assert config.channel_references[1].channel_identifier == "ch-time" - - -class TestExportsAPIAsync: - """Tests for the ExportsAPIAsync high-level client.""" - - class TestExportByRun: - """Tests for the export_by_run method.""" - - @pytest.mark.asyncio - async def test_delegates_to_low_level_and_returns_job(self, exports_api): - """Test that export_by_run passes correct args to low-level and returns a Job.""" - job = await exports_api.export_by_run( - runs=["run-1", "run-2"], - output_format=ExportOutputFormat.CSV, - start_time=START, - stop_time=STOP, - channels=["ch-1"], - simplify_channel_names=True, - combine_runs=True, - split_export_by_asset=True, - ) - assert isinstance(job, MagicMock) - exports_api._low_level_client.export_data.assert_awaited_once_with( - run_ids=["run-1", "run-2"], - asset_ids=None, - output_format=ExportOutputFormat.CSV, - start_time=START, - stop_time=STOP, - channel_ids=["ch-1"], - calculated_channels=None, - simplify_channel_names=True, - combine_runs=True, - split_export_by_asset=True, - split_export_by_run=False, - ) - - @pytest.mark.asyncio - async def test_resolves_run_objects_to_ids(self, exports_api): - """Test that Run domain objects are resolved to their IDs.""" - mock_run = MagicMock(spec=Run) - mock_run._id_or_error = "resolved-run-id" - - await exports_api.export_by_run( - runs=[mock_run, "raw-id"], - output_format=ExportOutputFormat.CSV, - ) + @pytest.mark.asyncio + async def test_asset_objects_to_ids(self, exports_api): + mock_asset = MagicMock(spec=Asset) + mock_asset._id_or_error = "resolved-asset-id" + await exports_api.export( + assets=[mock_asset, "raw-id"], start_time=START, stop_time=STOP, output_format=CSV + ) + assert exports_api._low_level_client.export_data.call_args.kwargs["asset_ids"] == [ + "resolved-asset-id", + "raw-id", + ] - call_kwargs = exports_api._low_level_client.export_data.call_args.kwargs - assert call_kwargs["run_ids"] == ["resolved-run-id", "raw-id"] + @pytest.mark.asyncio + async def test_channel_objects_to_ids(self, exports_api): + mock_ch = MagicMock(spec=Channel) + mock_ch._id_or_error = "resolved-ch-id" + await exports_api.export(runs=["run-1"], output_format=CSV, channels=[mock_ch, "raw-ch-id"]) + assert exports_api._low_level_client.export_data.call_args.kwargs["channel_ids"] == [ + "resolved-ch-id", + "raw-ch-id", + ] - @pytest.mark.asyncio - async def test_resolves_channel_objects_to_ids(self, exports_api): - """Test that Channel domain objects are resolved to their IDs.""" - mock_channel = MagicMock(spec=Channel) - mock_channel._id_or_error = "resolved-ch-id" - await exports_api.export_by_run( - runs=["run-1"], - output_format=ExportOutputFormat.CSV, - channels=[mock_channel, "raw-ch-id"], +class TestExportValidation: + @pytest.mark.asyncio + async def test_runs_and_assets_raises(self, exports_api): + with pytest.raises(ValueError, match="not both"): + await exports_api.export( + runs=["r"], assets=["a"], start_time=START, stop_time=STOP, output_format=CSV ) - call_kwargs = exports_api._low_level_client.export_data.call_args.kwargs - assert call_kwargs["channel_ids"] == ["resolved-ch-id", "raw-ch-id"] - - @pytest.mark.asyncio - async def test_empty_runs_raises(self, exports_api): - with pytest.raises(ValueError, match="runs"): - await exports_api.export_by_run(runs=[], output_format=ExportOutputFormat.CSV) - - @pytest.mark.asyncio - async def test_null_run_raises(self, exports_api): - with pytest.raises(ValueError, match="empty or null"): - await exports_api.export_by_run( - runs=["", "run-1"], output_format=ExportOutputFormat.CSV - ) - - @pytest.mark.asyncio - async def test_start_without_stop_raises(self, exports_api): - with pytest.raises(ValueError, match="both be provided or both omitted"): - await exports_api.export_by_run( - runs=["run-1"], - output_format=ExportOutputFormat.CSV, - start_time=START, - ) - - @pytest.mark.asyncio - async def test_stop_without_start_raises(self, exports_api): - with pytest.raises(ValueError, match="both be provided or both omitted"): - await exports_api.export_by_run( - runs=["run-1"], - output_format=ExportOutputFormat.CSV, - stop_time=STOP, - ) - - class TestExportByAsset: - """Tests for the export_by_asset method.""" - - @pytest.mark.asyncio - async def test_delegates_to_low_level_and_returns_job(self, exports_api): - job = await exports_api.export_by_asset( - assets=["asset-1"], - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.CSV, - channels=["ch-1", "ch-2"], - ) + @pytest.mark.asyncio + async def test_nothing_provided_raises(self, exports_api): + with pytest.raises(ValueError, match="At least one"): + await exports_api.export(output_format=CSV) - assert isinstance(job, MagicMock) - exports_api._low_level_client.export_data.assert_awaited_once_with( - run_ids=None, - asset_ids=["asset-1"], - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.CSV, - channel_ids=["ch-1", "ch-2"], - calculated_channels=None, - simplify_channel_names=False, - combine_runs=False, - split_export_by_asset=False, - split_export_by_run=False, - ) - @pytest.mark.asyncio - async def test_resolves_asset_objects_to_ids(self, exports_api): - mock_asset = MagicMock(spec=Asset) - mock_asset._id_or_error = "resolved-asset-id" +class TestBuildCalcChannelConfigs: + @pytest.mark.parametrize("input_val", [None, []]) + def test_empty_input(self, input_val): + assert _build_calc_channel_configs(input_val) == [] - await exports_api.export_by_asset( - assets=[mock_asset, "raw-id"], - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.CSV, - ) + def test_create_objects(self): + ccs = [ + CalculatedChannelCreate( + name="speed_doubled", + expression="$1 * 2", + units="m/s", + expression_channel_references=[ + ChannelReference(channel_reference="$1", channel_identifier="ch-1") + ], + ), + CalculatedChannelCreate( + name="no_units", + expression="$1 + $2", + expression_channel_references=[ + ChannelReference(channel_reference="$1", channel_identifier="ch-1"), + ChannelReference(channel_reference="$2", channel_identifier="ch-2"), + ], + ), + ] + result = _build_calc_channel_configs(ccs) + assert len(result) == 2 + assert result[0].name == "speed_doubled" + assert result[0].units == "m/s" + assert result[1].name == "no_units" + assert result[1].units == "" + assert len(result[1].channel_references) == 2 + + def test_existing_calculated_channel(self): + cc = MagicMock(spec=CalculatedChannel) + cc.name, cc.expression, cc.units = "derived", "$1 / $2", "m/s" + cc.channel_references = [ + ChannelReference(channel_reference="$1", channel_identifier="ch-dist"), + ChannelReference(channel_reference="$2", channel_identifier="ch-time"), + ] + result = _build_calc_channel_configs([cc]) + assert len(result) == 1 + assert result[0].name == "derived" + assert [r.channel_identifier for r in result[0].channel_references] == [ + "ch-dist", + "ch-time", + ] - call_kwargs = exports_api._low_level_client.export_data.call_args.kwargs - assert call_kwargs["asset_ids"] == ["resolved-asset-id", "raw-id"] - - @pytest.mark.asyncio - async def test_empty_assets_raises(self, exports_api): - with pytest.raises(ValueError, match="assets"): - await exports_api.export_by_asset( - assets=[], - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.CSV, - ) - - @pytest.mark.asyncio - async def test_null_asset_raises(self, exports_api): - with pytest.raises(ValueError, match="empty or null"): - await exports_api.export_by_asset( - assets=[""], - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.CSV, - ) - - class TestExportByTimeRange: - """Tests for the export_by_time_range method.""" - - @pytest.mark.asyncio - async def test_delegates_to_low_level_with_channels(self, exports_api): - await exports_api.export_by_time_range( - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.SUN, - channels=["ch-1"], - ) - exports_api._low_level_client.export_data.assert_awaited_once_with( - run_ids=None, - asset_ids=None, - start_time=START, - stop_time=STOP, - output_format=ExportOutputFormat.SUN, - channel_ids=["ch-1"], - calculated_channels=None, - simplify_channel_names=False, - combine_runs=False, - split_export_by_asset=False, - split_export_by_run=False, - ) +class TestResolveCalculatedChannels: + @pytest.mark.asyncio + async def test_none_passthrough(self): + api = MagicMock() + api.find = AsyncMock(return_value=None) + assert await resolve_calculated_channels(None, channels_api=api) is None + + @pytest.mark.asyncio + async def test_resolves_name_to_uuid(self): + mock_ch = MagicMock(spec=Channel) + mock_ch._id_or_error = "resolved-uuid" + api = MagicMock() + api.find = AsyncMock(return_value=mock_ch) + + cc = MagicMock(spec=CalculatedChannel) + cc.name, cc.expression, cc.units = "calc", "$1 + 10", "m/s" + cc.asset_ids = ["asset-1"] + cc.channel_references = [ + ChannelReference(channel_reference="$1", channel_identifier="sensor.vel") + ] - @pytest.mark.asyncio - async def test_no_channels_raises(self, exports_api): - with pytest.raises(ValueError, match=r"channels.*calculated_channels"): - await exports_api.export_by_time_range( - start_time=START, stop_time=STOP, output_format=ExportOutputFormat.CSV - ) - - class TestSharedValidation: - """Validation rules shared across all three export methods.""" - - @pytest.mark.asyncio - @pytest.mark.parametrize( - ("method", "kwargs"), - [ - ("export_by_run", {"runs": ["r-1"], "output_format": ExportOutputFormat.CSV}), - ("export_by_asset", {"assets": ["a-1"], "output_format": ExportOutputFormat.CSV}), - ( - "export_by_time_range", - {"output_format": ExportOutputFormat.CSV, "channels": ["ch-1"]}, - ), - ], - ) - async def test_start_after_stop_raises(self, exports_api, method, kwargs): - with pytest.raises(ValueError, match="start_time"): - await getattr(exports_api, method)(start_time=STOP, stop_time=START, **kwargs) - - @pytest.mark.asyncio - @pytest.mark.parametrize( - ("method", "kwargs"), - [ - ("export_by_run", {"runs": ["r-1"], "output_format": ExportOutputFormat.CSV}), - ( - "export_by_asset", - { - "assets": ["a-1"], - "output_format": ExportOutputFormat.CSV, - "start_time": START, - "stop_time": STOP, - }, - ), - ( - "export_by_time_range", - { - "output_format": ExportOutputFormat.CSV, - "channels": ["ch-1"], - "start_time": START, - "stop_time": STOP, - }, - ), + result = await resolve_calculated_channels([cc], channels_api=api) + assert result is not None + assert len(result) == 1 + assert result[0].expression_channel_references[0].channel_identifier == "resolved-uuid" + + @pytest.mark.asyncio + async def test_keeps_identifier_when_not_found(self): + api = MagicMock() + api.find = AsyncMock(return_value=None) + cc = CalculatedChannelCreate( + name="x", + expression="$1", + units="m", + expression_channel_references=[ + ChannelReference(channel_reference="$1", channel_identifier="ch-1") ], ) - async def test_combine_runs_with_split_by_run_raises(self, exports_api, method, kwargs): - with pytest.raises(ValueError, match="combine_runs.*split_export_by_run"): - await getattr(exports_api, method)( - combine_runs=True, split_export_by_run=True, **kwargs - ) - - class TestResolveCalculatedChannels: - """Tests for the resolve_calculated_channels utility.""" - - @pytest.mark.asyncio - async def test_passes_through_none(self): - mock_channels_api = MagicMock() - mock_channels_api.find = AsyncMock(return_value=None) - result = await resolve_calculated_channels(None, channels_api=mock_channels_api) - assert result is None - - @pytest.mark.asyncio - async def test_resolves_name_to_uuid(self, mock_calculated_channel, mock_resolved_channel): - """Name-based identifier is resolved to a UUID via channels_api.find.""" - mock_channels_api = MagicMock() - mock_channels_api.find = AsyncMock(return_value=mock_resolved_channel) - - result = await resolve_calculated_channels( - [mock_calculated_channel], channels_api=mock_channels_api - ) - assert result is not None - - assert len(result) == 1 - resolved = result[0] - assert isinstance(resolved, CalculatedChannelCreate) - assert resolved.expression_channel_references is not None - assert ( - resolved.expression_channel_references[0].channel_identifier == "resolved-ch-uuid" - ) + result = await resolve_calculated_channels([cc], channels_api=api) + assert result is not None + assert result[0] == cc - @pytest.mark.asyncio - async def test_keeps_identifier_when_not_found(self, sample_calc_channels): - """channels_api.find returns None → identifiers kept as-is.""" - mock_channels_api = MagicMock() - mock_channels_api.find = AsyncMock(return_value=None) - result = await resolve_calculated_channels( - sample_calc_channels, channels_api=mock_channels_api - ) - assert result is not None - assert result[0] == sample_calc_channels[0] - - @pytest.mark.asyncio - async def test_mixed_create_and_existing( - self, - sample_calc_channels, - mock_calculated_channel, - mock_resolved_channel, - ): - """Mix of CalculatedChannelCreate and CalculatedChannel resolves only names.""" - mock_calculated_channel.channel_references = [ - ChannelReference(channel_reference="$1", channel_identifier="sensor.rpm"), - ] - mock_resolved_channel._id_or_error = "rpm-uuid" - - async def find_side_effect(name, assets=None): - return mock_resolved_channel if name == "sensor.rpm" else None - - mock_channels_api = MagicMock() - mock_channels_api.find = AsyncMock(side_effect=find_side_effect) - - result = await resolve_calculated_channels( - [sample_calc_channels[0], mock_calculated_channel], - channels_api=mock_channels_api, - ) - - assert result is not None - assert len(result) == 2 - assert result[0] == sample_calc_channels[0] - assert isinstance(result[1], CalculatedChannelCreate) - assert result[1].expression_channel_references is not None - assert result[1].expression_channel_references[0].channel_identifier == "rpm-uuid" - - class TestWaitUntilComplete: - """Tests for the wait_and_download method.""" - @pytest.mark.asyncio - async def test_returns_file_paths_on_success(self, completed_export_setup): - s = completed_export_setup - mock_job = MagicMock(spec=Job) - mock_job._id_or_error = "job-123" - - with patch("asyncio.get_running_loop", return_value=s["mock_loop"]): - result = await s["api"].wait_and_download(job=mock_job, output_dir=s["tmp_path"]) +@pytest.fixture +def download_setup(exports_api, mock_client, tmp_path): + completed_job = MagicMock(spec=Job) + completed_job.job_status = JobStatus.FINISHED + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) + exports_api._low_level_client.get_download_url = AsyncMock( + return_value="https://dl.test/export.zip" + ) - assert result == [s["fake_file"]] - s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( - job="job-123", polling_interval_secs=5, timeout_secs=None - ) - s["api"]._low_level_client.get_download_url.assert_awaited_once_with(job_id="job-123") + fake_file = tmp_path / "data.csv" + fake_file.write_text("col1,col2\n1,2") + mock_loop = MagicMock() + mock_loop.run_in_executor = AsyncMock(return_value=[fake_file]) - @pytest.mark.asyncio - async def test_accepts_job_id_string(self, completed_export_setup): - s = completed_export_setup + return { + "api": exports_api, + "client": mock_client, + "tmp_path": tmp_path, + "fake_file": fake_file, + "loop": mock_loop, + } - with patch("asyncio.get_running_loop", return_value=s["mock_loop"]): - result = await s["api"].wait_and_download(job="job-456", output_dir=s["tmp_path"]) - assert result == [s["fake_file"]] - s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( - job="job-456", polling_interval_secs=5, timeout_secs=None - ) +class TestWaitAndDownload: + @pytest.mark.asyncio + async def test_success(self, download_setup): + s = download_setup + job = MagicMock(spec=Job) + job._id_or_error = "job-123" + with patch("asyncio.get_running_loop", return_value=s["loop"]): + result = await s["api"].wait_and_download(job=job, output_dir=s["tmp_path"]) + assert result == [s["fake_file"]] + s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( + job="job-123", polling_interval_secs=5, timeout_secs=None + ) - @pytest.mark.asyncio - async def test_extract_false_returns_zip_path(self, completed_export_setup): - s = completed_export_setup - mock_job = MagicMock(spec=Job) - mock_job._id_or_error = "job-123" - - fake_zip = s["tmp_path"] / "job-123.zip" - s["mock_loop"].run_in_executor = AsyncMock(return_value=[fake_zip]) - - with patch("asyncio.get_running_loop", return_value=s["mock_loop"]): - result = await s["api"].wait_and_download( - job=mock_job, output_dir=s["tmp_path"], extract=False - ) - - assert result == [fake_zip] - # Verify the lambda called download_and_extract_zip with extract=False - call_args = s["mock_loop"].run_in_executor.call_args - fn = call_args[0][1] # the lambda - assert callable(fn) - - @pytest.mark.asyncio - async def test_custom_polling_and_timeout(self, completed_export_setup): - s = completed_export_setup - mock_job = MagicMock(spec=Job) - mock_job._id_or_error = "job-123" - - with patch("asyncio.get_running_loop", return_value=s["mock_loop"]): - await s["api"].wait_and_download( - job=mock_job, polling_interval_secs=1, timeout_secs=10, output_dir=s["tmp_path"] - ) - - s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( - job="job-123", polling_interval_secs=1, timeout_secs=10 + @pytest.mark.asyncio + async def test_job_id_string(self, download_setup): + s = download_setup + with patch("asyncio.get_running_loop", return_value=s["loop"]): + result = await s["api"].wait_and_download(job="job-456", output_dir=s["tmp_path"]) + assert result == [s["fake_file"]] + + @pytest.mark.asyncio + async def test_custom_polling_and_timeout(self, download_setup): + s = download_setup + job = MagicMock(spec=Job) + job._id_or_error = "job-123" + with patch("asyncio.get_running_loop", return_value=s["loop"]): + await s["api"].wait_and_download( + job=job, polling_interval_secs=1, timeout_secs=10, output_dir=s["tmp_path"] ) - - @pytest.mark.asyncio - @pytest.mark.parametrize( - ("status", "details", "match"), - [ - ( - JobStatus.FAILED, - DataExportStatusDetails(error_message="out of memory"), - r"failed.*out of memory", - ), - (JobStatus.FAILED, None, "failed"), - (JobStatus.CANCELLED, None, "cancelled"), - ], + s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( + job="job-123", polling_interval_secs=1, timeout_secs=10 ) - async def test_terminal_job_status_raises( - self, exports_api, mock_client, status, details, match - ): - mock_job = MagicMock(spec=Job) - mock_job._id_or_error = "job-err" - - completed_job = MagicMock(spec=Job) - completed_job.job_status = status - completed_job.job_status_details = details - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) - - with pytest.raises(RuntimeError, match=match): - await exports_api.wait_and_download(job=mock_job) + + @pytest.mark.asyncio + @pytest.mark.parametrize( + ("status", "details", "match"), + [ + ( + JobStatus.FAILED, + DataExportStatusDetails(error_message="out of memory"), + r"failed.*out of memory", + ), + (JobStatus.FAILED, None, "failed"), + (JobStatus.CANCELLED, None, "cancelled"), + ], + ) + async def test_terminal_status_raises(self, exports_api, mock_client, status, details, match): + job = MagicMock(spec=Job) + job._id_or_error = "job-err" + completed = MagicMock(spec=Job) + completed.job_status = status + completed.job_status_details = details + mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed) + with pytest.raises(RuntimeError, match=match): + await exports_api.wait_and_download(job=job) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 69034120a..46b0c0473 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -34,14 +34,14 @@ def __init__(self, sift_client: SiftClient): super().__init__(sift_client) self._low_level_client = ExportsLowLevelClient(grpc_client=self.client.grpc_client) - async def _export( + async def export( self, *, output_format: ExportOutputFormat, + runs: list[str | Run] | None = None, + assets: list[str | Asset] | None = None, start_time: datetime | None = None, stop_time: datetime | None = None, - run_ids: list[str] | None = None, - asset_ids: list[str] | None = None, channels: list[str | Channel] | None = None, calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, simplify_channel_names: bool = False, @@ -49,20 +49,65 @@ async def _export( split_export_by_asset: bool = False, split_export_by_run: bool = False, ) -> Job: - """Shared implementation for all export methods. + """Export data from Sift. - Validates common constraints, resolves channels, calls the low-level - export API, and returns the resulting Job. - """ - if start_time is not None and stop_time is not None and start_time >= stop_time: - raise ValueError("'start_time' must be before 'stop_time'.") - if combine_runs and split_export_by_run: - raise ValueError( - "'combine_runs' cannot be used with 'split_export_by_run'. " - "Combining merges identical channels across runs into a single column, " - "which is not possible when each run is split into a separate file." - ) + Initiates an export on the server and returns a Job handle. Use + ``wait_and_download`` to poll for completion and download the files. + + There are three ways to scope the export, determined by which arguments + are provided: + + 1. **By runs** — provide ``runs``. The ``start_time``/``stop_time`` are + optional (if omitted, the full time range of each run is used). If no + ``channels`` or ``calculated_channels`` are provided, all channels + from the runs' assets are included. + + 2. **By assets** — provide ``assets``. Both ``start_time`` and + ``stop_time`` are **required**. If no ``channels`` or + ``calculated_channels`` are provided, all channels from the assets + are included. + + 3. **By time range only** — provide ``start_time`` and ``stop_time`` + without ``runs`` or ``assets``. At least one of ``channels`` or + ``calculated_channels`` **must** be provided to scope the data. + + You cannot provide both ``runs`` and ``assets`` at the same time. + + Args: + output_format: The file format for the export (CSV or Sun/WinPlot). + runs: One or more Run objects or run IDs to export data from. + assets: One or more Asset objects or asset IDs to export data from. + start_time: Start of the time range to export. Required when using + assets or time-range-only mode; optional when using runs. + stop_time: End of the time range to export. Required when using + assets or time-range-only mode; optional when using runs. + channels: Channel objects or channel IDs to include. If omitted and + runs or assets are provided, all channels are exported. Required + (along with ``calculated_channels``) in time-range-only mode. + calculated_channels: Calculated channels to include in the export. + Accepts existing CalculatedChannel objects or + CalculatedChannelCreate definitions. + simplify_channel_names: Remove text preceding last period in channel + names, only if the resulting simplified name is unique. + combine_runs: Identical channels within the same asset across + multiple runs will be combined into a single column. + split_export_by_asset: Split each asset into a separate file, with + asset name removed from channel name display. + split_export_by_run: Split each run into a separate file, with run + name removed from channel name display. + Returns: + A Job handle for the pending export. + """ + if runs and assets: + raise ValueError("Provide either 'runs' or 'assets', not both.") + if not runs and not assets and not start_time and not stop_time: + raise ValueError("At least one of 'runs', 'assets', or a time range must be provided.") + + run_ids = [r._id_or_error if isinstance(r, Run) else r for r in runs] if runs else None + asset_ids = ( + [a._id_or_error if isinstance(a, Asset) else a for a in assets] if assets else None + ) channel_ids = ( [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] ) @@ -87,180 +132,6 @@ async def _export( return await self.client.async_.jobs.get(job_id=job_id) - async def export_by_run( - self, - *, - runs: list[str | Run], - output_format: ExportOutputFormat, - start_time: datetime | None = None, - stop_time: datetime | None = None, - channels: list[str | Channel] | None = None, - calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - simplify_channel_names: bool = False, - combine_runs: bool = False, - split_export_by_asset: bool = False, - split_export_by_run: bool = False, - ) -> Job: - """Export data scoped by one or more runs. - - Initiates the export on the server and returns a Job handle. Use - ``wait_and_download`` to poll for completion and get the download URL. - - If no start_time/stop_time are provided, the full time range of each run is used. - If no channels or calculated_channels are provided, all channels from - the run's assets are included. - - Args: - runs: One or more Run objects or run IDs to export data from. - output_format: The file format for the export (CSV or Sun/WinPlot). - start_time: Optional start time to narrow the export within the run(s). - stop_time: Optional stop time to narrow the export within the run(s). - channels: Optional list of Channel objects or channel IDs to include. If omitted, all channels are exported. - calculated_channels: Optional calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. - simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. - combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. - split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. - split_export_by_run: Split each run into a separate file, with run name removed from channel name display. - - Returns: - A Job handle for the pending export. - """ - if not runs: - raise ValueError("'runs' must be a non-empty list of run objects or run ids.") - if any(not run for run in runs): - raise ValueError("'runs' must not contain empty or null values.") - if (start_time is None) != (stop_time is None): - raise ValueError("'start_time' and 'stop_time' must both be provided or both omitted.") - - run_ids = [r._id_or_error if isinstance(r, Run) else r for r in runs] - - return await self._export( - run_ids=run_ids, - output_format=output_format, - start_time=start_time, - stop_time=stop_time, - channels=channels, - calculated_channels=calculated_channels, - simplify_channel_names=simplify_channel_names, - combine_runs=combine_runs, - split_export_by_asset=split_export_by_asset, - split_export_by_run=split_export_by_run, - ) - - async def export_by_asset( - self, - *, - assets: list[str | Asset], - start_time: datetime, - stop_time: datetime, - output_format: ExportOutputFormat, - channels: list[str | Channel] | None = None, - calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - simplify_channel_names: bool = False, - combine_runs: bool = False, - split_export_by_asset: bool = False, - split_export_by_run: bool = False, - ) -> Job: - """Export data scoped by one or more assets within a time range. - - Initiates the export on the server and returns a Job handle. Use - ``wait_and_download`` to poll for completion and get the download URL. - - Both start_time and stop_time are required. If no channels or - calculated_channels are provided, all channels from the assets are included. - - Args: - assets: One or more Asset objects or asset IDs to export data from. - start_time: Start of the time range to export. - stop_time: End of the time range to export. - output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). - channels: Optional list of Channel objects or channel IDs to include. If omitted, all channels are exported. - calculated_channels: Optional calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. - simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. - combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. - split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. - split_export_by_run: Split each run into a separate file, with run name removed from channel name display. - - Returns: - A Job handle for the pending export. - """ - if not assets: - raise ValueError("'assets' must be a non-empty list of asset objects or asset IDs.") - if any(not asset for asset in assets): - raise ValueError("'assets' must not contain empty or null values.") - - asset_ids = [a._id_or_error if isinstance(a, Asset) else a for a in assets] - - return await self._export( - asset_ids=asset_ids, - start_time=start_time, - stop_time=stop_time, - output_format=output_format, - channels=channels, - calculated_channels=calculated_channels, - simplify_channel_names=simplify_channel_names, - combine_runs=combine_runs, - split_export_by_asset=split_export_by_asset, - split_export_by_run=split_export_by_run, - ) - - async def export_by_time_range( - self, - *, - start_time: datetime, - stop_time: datetime, - output_format: ExportOutputFormat, - channels: list[str | Channel] | None = None, - calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - simplify_channel_names: bool = False, - combine_runs: bool = False, - split_export_by_asset: bool = False, - split_export_by_run: bool = False, - ) -> Job: - """Export data within a time range. - - Initiates the export on the server and returns a Job handle. Use - ``wait_and_download`` to poll for completion and get the download URL. - - Both start_time and stop_time are required. At least one of channels or - calculated_channels **must** be provided to scope the data, since there - are no runs or assets to infer channels from. - - Args: - start_time: Start of the time range to export. - stop_time: End of the time range to export. - output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). - channels: List of Channel objects or channel IDs to include in the export. - calculated_channels: Calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. - simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. - combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. - split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. - split_export_by_run: Split each run into a separate file, with run name removed from channel name display. - - Returns: - A Job handle for the pending export. - - Raises: - ValueError: If neither channels nor calculated_channels is provided. - """ - if not channels and not calculated_channels: - raise ValueError( - "At least one of 'channels' or 'calculated_channels' must be provided " - "when exporting by time range." - ) - - return await self._export( - start_time=start_time, - stop_time=stop_time, - output_format=output_format, - channels=channels, - calculated_channels=calculated_channels, - simplify_channel_names=simplify_channel_names, - combine_runs=combine_runs, - split_export_by_asset=split_export_by_asset, - split_export_by_run=split_export_by_run, - ) - async def wait_and_download( self, *, diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index c56b9d6c2..45857c284 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -553,50 +553,12 @@ class ExportsAPI: ... def _run(self, coro): ... - def export_by_asset( + def export( self, *, - assets: list[str | Asset], - start_time: datetime, - stop_time: datetime, - output_format: ExportOutputFormat, - channels: list[str | Channel] | None = None, - calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - simplify_channel_names: bool = False, - combine_runs: bool = False, - split_export_by_asset: bool = False, - split_export_by_run: bool = False, - ) -> Job: - """Export data scoped by one or more assets within a time range. - - Initiates the export on the server and returns a Job handle. Use - ``wait_and_download`` to poll for completion and get the download URL. - - Both start_time and stop_time are required. If no channels or - calculated_channels are provided, all channels from the assets are included. - - Args: - assets: One or more Asset objects or asset IDs to export data from. - start_time: Start of the time range to export. - stop_time: End of the time range to export. - output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). - channels: Optional list of Channel objects or channel IDs to include. If omitted, all channels are exported. - calculated_channels: Optional calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. - simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. - combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. - split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. - split_export_by_run: Split each run into a separate file, with run name removed from channel name display. - - Returns: - A Job handle for the pending export. - """ - ... - - def export_by_run( - self, - *, - runs: list[str | Run], output_format: ExportOutputFormat, + runs: list[str | Run] | None = None, + assets: list[str | Asset] | None = None, start_time: datetime | None = None, stop_time: datetime | None = None, channels: list[str | Channel] | None = None, @@ -606,70 +568,55 @@ class ExportsAPI: split_export_by_asset: bool = False, split_export_by_run: bool = False, ) -> Job: - """Export data scoped by one or more runs. + """Export data from Sift. - Initiates the export on the server and returns a Job handle. Use - ``wait_and_download`` to poll for completion and get the download URL. + Initiates an export on the server and returns a Job handle. Use + ``wait_and_download`` to poll for completion and download the files. - If no start_time/stop_time are provided, the full time range of each run is used. - If no channels or calculated_channels are provided, all channels from - the run's assets are included. + There are three ways to scope the export, determined by which arguments + are provided: - Args: - runs: One or more Run objects or run IDs to export data from. - output_format: The file format for the export (CSV or Sun/WinPlot). - start_time: Optional start time to narrow the export within the run(s). - stop_time: Optional stop time to narrow the export within the run(s). - channels: Optional list of Channel objects or channel IDs to include. If omitted, all channels are exported. - calculated_channels: Optional calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. - simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. - combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. - split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. - split_export_by_run: Split each run into a separate file, with run name removed from channel name display. + 1. **By runs** — provide ``runs``. The ``start_time``/``stop_time`` are + optional (if omitted, the full time range of each run is used). If no + ``channels`` or ``calculated_channels`` are provided, all channels + from the runs' assets are included. - Returns: - A Job handle for the pending export. - """ - ... - - def export_by_time_range( - self, - *, - start_time: datetime, - stop_time: datetime, - output_format: ExportOutputFormat, - channels: list[str | Channel] | None = None, - calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, - simplify_channel_names: bool = False, - combine_runs: bool = False, - split_export_by_asset: bool = False, - split_export_by_run: bool = False, - ) -> Job: - """Export data within a time range. + 2. **By assets** — provide ``assets``. Both ``start_time`` and + ``stop_time`` are **required**. If no ``channels`` or + ``calculated_channels`` are provided, all channels from the assets + are included. - Initiates the export on the server and returns a Job handle. Use - ``wait_and_download`` to poll for completion and get the download URL. + 3. **By time range only** — provide ``start_time`` and ``stop_time`` + without ``runs`` or ``assets``. At least one of ``channels`` or + ``calculated_channels`` **must** be provided to scope the data. - Both start_time and stop_time are required. At least one of channels or - calculated_channels **must** be provided to scope the data, since there - are no runs or assets to infer channels from. + You cannot provide both ``runs`` and ``assets`` at the same time. Args: - start_time: Start of the time range to export. - stop_time: End of the time range to export. - output_format: The file format for the export (CSV, Parquet, or Sun/WinPlot). - channels: List of Channel objects or channel IDs to include in the export. - calculated_channels: Calculated channels to include in the export. Accepts existing CalculatedChannel objects or CalculatedChannelCreate definitions. - simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. - combine_runs: Identical channels within the same asset across multiple runs will be combined into a single column. - split_export_by_asset: Split each asset into a separate file, with asset name removed from channel name display. - split_export_by_run: Split each run into a separate file, with run name removed from channel name display. + output_format: The file format for the export (CSV or Sun/WinPlot). + runs: One or more Run objects or run IDs to export data from. + assets: One or more Asset objects or asset IDs to export data from. + start_time: Start of the time range to export. Required when using + assets or time-range-only mode; optional when using runs. + stop_time: End of the time range to export. Required when using + assets or time-range-only mode; optional when using runs. + channels: Channel objects or channel IDs to include. If omitted and + runs or assets are provided, all channels are exported. Required + (along with ``calculated_channels``) in time-range-only mode. + calculated_channels: Calculated channels to include in the export. + Accepts existing CalculatedChannel objects or + CalculatedChannelCreate definitions. + simplify_channel_names: Remove text preceding last period in channel + names, only if the resulting simplified name is unique. + combine_runs: Identical channels within the same asset across + multiple runs will be combined into a single column. + split_export_by_asset: Split each asset into a separate file, with + asset name removed from channel name display. + split_export_by_run: Split each run into a separate file, with run + name removed from channel name display. Returns: A Job handle for the pending export. - - Raises: - ValueError: If neither channels nor calculated_channels is provided. """ ... From 35fd1723775df41b90510ef6ebbdb7bbbc97b711 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 19 Mar 2026 13:10:41 -0700 Subject: [PATCH 40/53] mypy fix --- python/lib/sift_client/_internal/util/channels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/lib/sift_client/_internal/util/channels.py b/python/lib/sift_client/_internal/util/channels.py index a3054f317..8c3d39d82 100644 --- a/python/lib/sift_client/_internal/util/channels.py +++ b/python/lib/sift_client/_internal/util/channels.py @@ -12,7 +12,7 @@ async def resolve_calculated_channels( calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None, channels_api: ChannelsAPIAsync, -) -> list[CalculatedChannelCreate] | None: +) -> list[CalculatedChannel | CalculatedChannelCreate] | None: """Resolve channel reference identifiers from names to UUIDs. For each channel reference, looks up the identifier as a channel name. @@ -20,7 +20,7 @@ async def resolve_calculated_channels( the identifier is already a UUID and keeps it as-is. """ if not calculated_channels: - return calculated_channels + return None resolved: list[CalculatedChannel | CalculatedChannelCreate] = [] for cc in calculated_channels: From 75f0d1d8a49c6a72f8032d8a539a8aba76bc1542 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 19 Mar 2026 13:20:08 -0700 Subject: [PATCH 41/53] python(refactor): use rest_client instead of raw request --- python/lib/sift_client/_internal/util/download.py | 15 +++++++++------ python/lib/sift_client/resources/exports.py | 5 ++++- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/python/lib/sift_client/_internal/util/download.py b/python/lib/sift_client/_internal/util/download.py index fce09f489..401357232 100644 --- a/python/lib/sift_client/_internal/util/download.py +++ b/python/lib/sift_client/_internal/util/download.py @@ -3,25 +3,27 @@ import zipfile from typing import TYPE_CHECKING -import requests - if TYPE_CHECKING: from pathlib import Path + from sift_client.transport.rest_transport import RestClient + def download_and_extract_zip( - url: str, zip_path: Path, output_dir: Path, *, extract: bool = True + url: str, zip_path: Path, output_dir: Path, *, rest_client: RestClient, extract: bool = True ) -> list[Path]: """Download a zip file from a URL and optionally extract its contents. - Downloads the file in streaming 4 MiB chunks. If extract is True, - extracts all contents to the output directory and removes the zip file. + Downloads the file in streaming 4 MiB chunks using the SDK's rest client. + If extract is True, extracts all contents to the output directory and + removes the zip file. Args: url: The URL to download the zip file from. zip_path: Path where the zip file will be saved. output_dir: Directory to extract the zip contents into. Created if it doesn't exist. + rest_client: The SDK rest client to use for the download. extract: If True (default), extract the zip and delete it. If False, keep the zip file as-is. @@ -34,7 +36,8 @@ def download_and_extract_zip( zipfile.BadZipFile: If the downloaded file is not a valid zip. """ output_dir.mkdir(parents=True, exist_ok=True) - with requests.get(url=url, stream=True) as response: + # Strip the session's default Authorization header, presigned URLs carry their own auth + with rest_client.get(url, stream=True, headers={"Authorization": None}) as response: response.raise_for_status() with zip_path.open("wb") as file: for chunk in response.iter_content(chunk_size=4194304): # 4 MiB diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 46b0c0473..02968ee80 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -194,10 +194,13 @@ async def wait_and_download( zip_path = output_dir / f"{job_id}.zip" # Run the synchronous download in a thread pool to avoid blocking the event loop + rest_client = self.client.rest_client loop = asyncio.get_running_loop() extracted_files = await loop.run_in_executor( None, - lambda: download_and_extract_zip(presigned_url, zip_path, output_dir, extract=extract), + lambda: download_and_extract_zip( + presigned_url, zip_path, output_dir, rest_client=rest_client, extract=extract + ), ) return extracted_files From 28d353cfe61bd2ba10cda352a290cdbf02594643 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 19 Mar 2026 13:33:43 -0700 Subject: [PATCH 42/53] python(refactor): added dict support for calc channels --- .../_tests/resources/test_exports.py | 23 +++++++++++++++++++ python/lib/sift_client/resources/exports.py | 15 ++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 455b1463f..99f109bf0 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -263,6 +263,29 @@ async def test_channel_objects_to_ids(self, exports_api): ] +class TestDictConversion: + @pytest.mark.asyncio + async def test_calculated_channel_dict_converted(self, exports_api): + await exports_api.export( + runs=["run-1"], + output_format=CSV, + calculated_channels=[ + { + "name": "calc", + "expression": "$1 + 1", + "expression_channel_references": [ + {"channel_reference": "$1", "channel_identifier": "ch-1"} + ], + } + ], + ) + cc = exports_api._low_level_client.export_data.call_args.kwargs["calculated_channels"] + assert cc is not None + assert len(cc) == 1 + assert isinstance(cc[0], CalculatedChannelCreate) + assert cc[0].name == "calc" + + class TestExportValidation: @pytest.mark.asyncio async def test_runs_and_assets_raises(self, exports_api): diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 02968ee80..df27a497f 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -10,6 +10,7 @@ from sift_client._internal.util.download import download_and_extract_zip from sift_client.resources._base import ResourceBase from sift_client.sift_types.asset import Asset +from sift_client.sift_types.calculated_channel import CalculatedChannelCreate from sift_client.sift_types.channel import Channel from sift_client.sift_types.export import ExportOutputFormat # noqa: TC001 from sift_client.sift_types.job import Job @@ -19,7 +20,7 @@ from datetime import datetime from sift_client.client import SiftClient - from sift_client.sift_types.calculated_channel import CalculatedChannel, CalculatedChannelCreate + from sift_client.sift_types.calculated_channel import CalculatedChannel class ExportsAPIAsync(ResourceBase): @@ -43,7 +44,7 @@ async def export( start_time: datetime | None = None, stop_time: datetime | None = None, channels: list[str | Channel] | None = None, - calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate | dict] | None = None, simplify_channel_names: bool = False, combine_runs: bool = False, split_export_by_asset: bool = False, @@ -85,8 +86,9 @@ async def export( runs or assets are provided, all channels are exported. Required (along with ``calculated_channels``) in time-range-only mode. calculated_channels: Calculated channels to include in the export. - Accepts existing CalculatedChannel objects or - CalculatedChannelCreate definitions. + Accepts existing CalculatedChannel objects, + CalculatedChannelCreate definitions, or dictionaries that + will be converted to CalculatedChannelCreate via model_validate. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across @@ -111,6 +113,11 @@ async def export( channel_ids = ( [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] ) + if calculated_channels: + calculated_channels = [ + CalculatedChannelCreate.model_validate(cc) if isinstance(cc, dict) else cc + for cc in calculated_channels + ] resolved_calc_channels = await resolve_calculated_channels( calculated_channels, channels_api=self.client.async_.channels, From c971119ce8b62fe6e43fb2fd2dd97d190d32cdb3 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 19 Mar 2026 13:44:52 -0700 Subject: [PATCH 43/53] python(refactor): class rename to DataExportsAPI --- .../_internal/low_level_wrappers/exports.py | 4 ++-- .../sift_client/_tests/resources/test_exports.py | 14 +++++++------- python/lib/sift_client/client.py | 12 ++++++------ python/lib/sift_client/resources/__init__.py | 8 ++++---- python/lib/sift_client/resources/exports.py | 4 ++-- .../sift_client/resources/sync_stubs/__init__.py | 6 +++--- .../sift_client/resources/sync_stubs/__init__.pyi | 13 +++++++------ python/lib/sift_client/util/util.py | 6 +++--- 8 files changed, 34 insertions(+), 33 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/exports.py b/python/lib/sift_client/_internal/low_level_wrappers/exports.py index 326462bf1..63aa200cd 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/exports.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/exports.py @@ -60,9 +60,9 @@ def _build_calc_channel_configs( class ExportsLowLevelClient(LowLevelClientBase, WithGrpcClient): - """Low-level client for the ExportsAPI. + """Low-level client for the DataExportAPI. - This class provides a thin wrapper around the autogenerated gRPC bindings for the ExportsAPI. + This class provides a thin wrapper around the autogenerated gRPC bindings for the DataExportAPI. """ def __init__(self, grpc_client: GrpcClient): diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 99f109bf0..b1f0e3091 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -13,8 +13,8 @@ if TYPE_CHECKING: from sift_client import SiftClient -from sift_client.resources import ExportsAPI -from sift_client.resources.exports import ExportsAPIAsync +from sift_client.resources import DataExportAPI +from sift_client.resources.exports import DataExportAPIAsync from sift_client.sift_types.asset import Asset from sift_client.sift_types.calculated_channel import ( CalculatedChannel, @@ -33,12 +33,12 @@ @pytest.fixture def exports_api_async(sift_client: SiftClient): - return sift_client.async_.exports + return sift_client.async_.data_export @pytest.fixture def exports_api_sync(sift_client: SiftClient): - return sift_client.exports + return sift_client.data_export @pytest.fixture @@ -63,7 +63,7 @@ def mock_job(): @pytest.fixture def exports_api(mock_client, mock_job): with patch("sift_client.resources.exports.ExportsLowLevelClient", autospec=True) as mock_ll: - api = ExportsAPIAsync(mock_client) + api = DataExportAPIAsync(mock_client) api._low_level_client = mock_ll.return_value api._low_level_client.export_data = AsyncMock(return_value="job-123") mock_client.async_.jobs.get = AsyncMock(return_value=mock_job) @@ -72,8 +72,8 @@ def exports_api(mock_client, mock_job): @pytest.mark.integration def test_client_binding(sift_client): - assert isinstance(sift_client.exports, ExportsAPI) - assert isinstance(sift_client.async_.exports, ExportsAPIAsync) + assert isinstance(sift_client.data_export, DataExportAPI) + assert isinstance(sift_client.async_.data_export, DataExportAPIAsync) @pytest.mark.integration diff --git a/python/lib/sift_client/client.py b/python/lib/sift_client/client.py index dcb07c175..ed7aeba9a 100644 --- a/python/lib/sift_client/client.py +++ b/python/lib/sift_client/client.py @@ -7,8 +7,8 @@ CalculatedChannelsAPIAsync, ChannelsAPI, ChannelsAPIAsync, - ExportsAPI, - ExportsAPIAsync, + DataExportAPI, + DataExportAPIAsync, FileAttachmentsAPI, FileAttachmentsAPIAsync, IngestionAPIAsync, @@ -107,8 +107,8 @@ class SiftClient( test_results: TestResultsAPI """Instance of the Test Results API for making synchronous requests.""" - exports: ExportsAPI - """Instance of the Exports API for making synchronous requests.""" + data_export: DataExportAPI + """Instance of the Data Export API for making synchronous requests.""" async_: AsyncAPIs """Accessor for the asynchronous APIs. All asynchronous APIs are available as attributes on this accessor.""" @@ -158,7 +158,7 @@ def __init__( self.runs = RunsAPI(self) self.tags = TagsAPI(self) self.test_results = TestResultsAPI(self) - self.exports = ExportsAPI(self) + self.data_export = DataExportAPI(self) # Accessor for the asynchronous APIs self.async_ = AsyncAPIs( @@ -174,7 +174,7 @@ def __init__( runs=RunsAPIAsync(self), tags=TagsAPIAsync(self), test_results=TestResultsAPIAsync(self), - exports=ExportsAPIAsync(self), + data_export=DataExportAPIAsync(self), ) @property diff --git a/python/lib/sift_client/resources/__init__.py b/python/lib/sift_client/resources/__init__.py index 011f6af84..78b3b4eba 100644 --- a/python/lib/sift_client/resources/__init__.py +++ b/python/lib/sift_client/resources/__init__.py @@ -162,7 +162,7 @@ async def main(): from sift_client.resources.runs import RunsAPIAsync from sift_client.resources.tags import TagsAPIAsync from sift_client.resources.test_results import TestResultsAPIAsync -from sift_client.resources.exports import ExportsAPIAsync +from sift_client.resources.exports import DataExportAPIAsync # ruff: noqa All imports needs to be imported before sync_stubs to avoid circular import from sift_client.resources.sync_stubs import ( @@ -177,7 +177,7 @@ async def main(): TagsAPI, TestResultsAPI, FileAttachmentsAPI, - ExportsAPI, + DataExportAPI, ) import sys @@ -213,6 +213,6 @@ async def main(): "TestResultsAPI", "TestResultsAPIAsync", "TracingConfig", - "ExportsAPI", - "ExportsAPIAsync", + "DataExportAPI", + "DataExportAPIAsync", ] diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index df27a497f..9a00a2f79 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -23,11 +23,11 @@ from sift_client.sift_types.calculated_channel import CalculatedChannel -class ExportsAPIAsync(ResourceBase): +class DataExportAPIAsync(ResourceBase): """High-level API for exporting data from Sift.""" def __init__(self, sift_client: SiftClient): - """Initialize the ExportsAPI. + """Initialize the DataExportAPI. Args: sift_client: The Sift client to use. diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.py b/python/lib/sift_client/resources/sync_stubs/__init__.py index 11cf22e6b..acd73755e 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.py +++ b/python/lib/sift_client/resources/sync_stubs/__init__.py @@ -7,7 +7,7 @@ AssetsAPIAsync, CalculatedChannelsAPIAsync, ChannelsAPIAsync, - ExportsAPIAsync, + DataExportAPIAsync, FileAttachmentsAPIAsync, JobsAPIAsync, PingAPIAsync, @@ -29,13 +29,13 @@ ReportsAPI = generate_sync_api(ReportsAPIAsync, "ReportsAPI") TagsAPI = generate_sync_api(TagsAPIAsync, "TagsAPI") TestResultsAPI = generate_sync_api(TestResultsAPIAsync, "TestResultsAPI") -ExportsAPI = generate_sync_api(ExportsAPIAsync, "ExportsAPI") +DataExportAPI = generate_sync_api(DataExportAPIAsync, "DataExportAPI") __all__ = [ "AssetsAPI", "CalculatedChannelsAPI", "ChannelsAPI", - "ExportsAPI", + "DataExportAPI", "FileAttachmentsAPI", "JobsAPI", "PingAPI", diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index 45857c284..6d95a578f 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -538,14 +538,14 @@ class ChannelsAPI: """ ... -class ExportsAPI: - """Sync counterpart to `ExportsAPIAsync`. +class DataExportAPI: + """Sync counterpart to `DataExportAPIAsync`. High-level API for exporting data from Sift. """ def __init__(self, sift_client: SiftClient): - """Initialize the ExportsAPI. + """Initialize the DataExportAPI. Args: sift_client: The Sift client to use. @@ -562,7 +562,7 @@ class ExportsAPI: start_time: datetime | None = None, stop_time: datetime | None = None, channels: list[str | Channel] | None = None, - calculated_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = None, + calculated_channels: list[CalculatedChannel | CalculatedChannelCreate | dict] | None = None, simplify_channel_names: bool = False, combine_runs: bool = False, split_export_by_asset: bool = False, @@ -604,8 +604,9 @@ class ExportsAPI: runs or assets are provided, all channels are exported. Required (along with ``calculated_channels``) in time-range-only mode. calculated_channels: Calculated channels to include in the export. - Accepts existing CalculatedChannel objects or - CalculatedChannelCreate definitions. + Accepts existing CalculatedChannel objects, + CalculatedChannelCreate definitions, or dictionaries that + will be converted to CalculatedChannelCreate via model_validate. simplify_channel_names: Remove text preceding last period in channel names, only if the resulting simplified name is unique. combine_runs: Identical channels within the same asset across diff --git a/python/lib/sift_client/util/util.py b/python/lib/sift_client/util/util.py index bd1c741f3..e82a8ccfe 100644 --- a/python/lib/sift_client/util/util.py +++ b/python/lib/sift_client/util/util.py @@ -7,7 +7,7 @@ AssetsAPIAsync, CalculatedChannelsAPIAsync, ChannelsAPIAsync, - ExportsAPIAsync, + DataExportAPIAsync, FileAttachmentsAPIAsync, IngestionAPIAsync, JobsAPIAsync, @@ -59,8 +59,8 @@ class AsyncAPIs(NamedTuple): test_results: TestResultsAPIAsync """Instance of the Test Results API for making asynchronous requests.""" - exports: ExportsAPIAsync - """Instance of the Exports API for making asynchronous requests.""" + data_export: DataExportAPIAsync + """Instance of the Data Export API for making asynchronous requests.""" def count_non_none(*args: Any) -> int: From ab3576488cf187c0d0b91e0097ed9524ee9d5dbe Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 19 Mar 2026 13:53:21 -0700 Subject: [PATCH 44/53] mypy fix --- python/lib/sift_client/resources/exports.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 9a00a2f79..23d5dd050 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -113,13 +113,16 @@ async def export( channel_ids = ( [c._id_or_error if isinstance(c, Channel) else c for c in channels] if channels else [] ) - if calculated_channels: - calculated_channels = [ + normalized_calc_channels: list[CalculatedChannel | CalculatedChannelCreate] | None = ( + [ CalculatedChannelCreate.model_validate(cc) if isinstance(cc, dict) else cc for cc in calculated_channels ] + if calculated_channels + else None + ) resolved_calc_channels = await resolve_calculated_channels( - calculated_channels, + normalized_calc_channels, channels_api=self.client.async_.channels, ) From f8c9caf3d407f2d29a549672181cc019fd5544a2 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 19 Mar 2026 14:06:18 -0700 Subject: [PATCH 45/53] pyright fix --- python/lib/sift_client/_tests/resources/test_exports.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index b1f0e3091..ec5a72e58 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -372,7 +372,9 @@ async def test_resolves_name_to_uuid(self): result = await resolve_calculated_channels([cc], channels_api=api) assert result is not None assert len(result) == 1 - assert result[0].expression_channel_references[0].channel_identifier == "resolved-uuid" + refs = result[0].expression_channel_references + assert refs is not None + assert refs[0].channel_identifier == "resolved-uuid" @pytest.mark.asyncio async def test_keeps_identifier_when_not_found(self): From 9e32d81a131974e987f1248137c7603a8060ee8f Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 19 Mar 2026 14:20:08 -0700 Subject: [PATCH 46/53] timeout increased --- python/lib/sift_client/_tests/resources/test_exports.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index ec5a72e58..cd6d21bc1 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -118,7 +118,7 @@ async def test_wait_and_download(self, exports_api_async, sift_client, tmp_path) assert runs, "No runs available" job = await exports_api_async.export(runs=[runs[0]], output_format=CSV) files = await exports_api_async.wait_and_download( - job=job, output_dir=tmp_path, timeout_secs=60 + job=job, output_dir=tmp_path, timeout_secs=300 ) assert len(files) > 0 assert all(f.exists() for f in files) From 05a4ce12b1675da2435a96fb32701c3ed471e1b0 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 20 Mar 2026 12:52:39 -0700 Subject: [PATCH 47/53] python(refactor): split up the download_and_extract function and renamed the file to be more appropriate --- .../_internal/util/{download.py => file.py} | 49 ++++++++++--------- python/lib/sift_client/resources/exports.py | 16 +++--- 2 files changed, 35 insertions(+), 30 deletions(-) rename python/lib/sift_client/_internal/util/{download.py => file.py} (51%) diff --git a/python/lib/sift_client/_internal/util/download.py b/python/lib/sift_client/_internal/util/file.py similarity index 51% rename from python/lib/sift_client/_internal/util/download.py rename to python/lib/sift_client/_internal/util/file.py index 401357232..76ee0e8db 100644 --- a/python/lib/sift_client/_internal/util/download.py +++ b/python/lib/sift_client/_internal/util/file.py @@ -9,44 +9,49 @@ from sift_client.transport.rest_transport import RestClient -def download_and_extract_zip( - url: str, zip_path: Path, output_dir: Path, *, rest_client: RestClient, extract: bool = True -) -> list[Path]: - """Download a zip file from a URL and optionally extract its contents. - - Downloads the file in streaming 4 MiB chunks using the SDK's rest client. - If extract is True, extracts all contents to the output directory and - removes the zip file. +def download_file(url: str, dest: Path, *, rest_client: RestClient) -> Path: + """Download a file from a URL in streaming 4 MiB chunks. Args: - url: The URL to download the zip file from. - zip_path: Path where the zip file will be saved. - output_dir: Directory to extract the zip contents into. - Created if it doesn't exist. + url: The URL to download from. + dest: Path where the file will be saved. Parent directories are created if needed. rest_client: The SDK rest client to use for the download. - extract: If True (default), extract the zip and delete it. - If False, keep the zip file as-is. Returns: - List of paths to the extracted files (excludes directories), - or a single-element list containing the zip path if extract is False. + The path to the downloaded file. Raises: requests.HTTPError: If the download request fails. - zipfile.BadZipFile: If the downloaded file is not a valid zip. """ - output_dir.mkdir(parents=True, exist_ok=True) + dest.parent.mkdir(parents=True, exist_ok=True) # Strip the session's default Authorization header, presigned URLs carry their own auth with rest_client.get(url, stream=True, headers={"Authorization": None}) as response: response.raise_for_status() - with zip_path.open("wb") as file: + with dest.open("wb") as file: for chunk in response.iter_content(chunk_size=4194304): # 4 MiB if chunk: file.write(chunk) - if not extract: - return [zip_path] + return dest + + +def extract_zip(zip_path: Path, output_dir: Path, *, delete_zip: bool = True) -> list[Path]: + """Extract a zip file to a directory. + + Args: + zip_path: Path to the zip file. + output_dir: Directory to extract contents into. Created if it doesn't exist. + delete_zip: If True (default), delete the zip file after extraction. + + Returns: + List of paths to the extracted files (excludes directories). + + Raises: + zipfile.BadZipFile: If the file is not a valid zip. + """ + output_dir.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(zip_path, "r") as zip_file: names = zip_file.namelist() zip_file.extractall(output_dir) - zip_path.unlink() + if delete_zip: + zip_path.unlink() return [output_dir / name for name in names if not name.endswith("/")] diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 23d5dd050..a3b6d1a17 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -7,13 +7,13 @@ from sift_client._internal.low_level_wrappers.exports import ExportsLowLevelClient from sift_client._internal.util.channels import resolve_calculated_channels -from sift_client._internal.util.download import download_and_extract_zip +from sift_client._internal.util.file import download_file, extract_zip from sift_client.resources._base import ResourceBase from sift_client.sift_types.asset import Asset from sift_client.sift_types.calculated_channel import CalculatedChannelCreate from sift_client.sift_types.channel import Channel from sift_client.sift_types.export import ExportOutputFormat # noqa: TC001 -from sift_client.sift_types.job import Job +from sift_client.sift_types.job import Job from sift_client.sift_types.run import Run if TYPE_CHECKING: @@ -201,16 +201,16 @@ async def wait_and_download( if output_dir is not None else Path(tempfile.mkdtemp(prefix="sift_export_")) ) - zip_path = output_dir / f"{job_id}.zip" + zip_file_path = output_dir / f"{job_id}.zip" # Run the synchronous download in a thread pool to avoid blocking the event loop rest_client = self.client.rest_client loop = asyncio.get_running_loop() - extracted_files = await loop.run_in_executor( + await loop.run_in_executor( None, - lambda: download_and_extract_zip( - presigned_url, zip_path, output_dir, rest_client=rest_client, extract=extract - ), + lambda: download_file(presigned_url, zip_file_path, rest_client=rest_client), ) - return extracted_files + if not extract: + return [zip_file_path] + return extract_zip(zip_file_path, output_dir) From e8ab2901b6abda56ffb49b9254741ef36e8d9f69 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 20 Mar 2026 13:18:13 -0700 Subject: [PATCH 48/53] python(fix): scoped the integration export jobs to 10s --- .../_tests/resources/test_exports.py | 91 +++++++++++-------- python/lib/sift_client/resources/exports.py | 2 +- 2 files changed, 52 insertions(+), 41 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index cd6d21bc1..dd3a233be 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -79,75 +79,83 @@ def test_client_binding(sift_client): @pytest.mark.integration class TestExportsIntegration: @pytest.mark.asyncio - async def test_export_by_run(self, exports_api_async, sift_client): - runs = await sift_client.async_.runs.list_(limit=1) - assert runs, "No runs available" - job = await exports_api_async.export(runs=[runs[0]], output_format=CSV) + async def test_export_by_run(self, exports_api_async, nostromo_run): + start = nostromo_run.start_time + job = await exports_api_async.export( + runs=[nostromo_run], + start_time=start, + stop_time=start + timedelta(seconds=10), + output_format=CSV, + ) assert isinstance(job, Job) assert job.id_ is not None @pytest.mark.asyncio - async def test_export_by_asset(self, exports_api_async, sift_client): - assets = await sift_client.async_.assets.list_(limit=1) - assert assets, "No assets available" - now = datetime.now(timezone.utc) + async def test_export_by_asset(self, exports_api_async, nostromo_asset, nostromo_run): + start = nostromo_run.start_time job = await exports_api_async.export( - assets=[assets[0]], - start_time=now - timedelta(hours=1), - stop_time=now, + assets=[nostromo_asset], + start_time=start, + stop_time=start + timedelta(seconds=10), output_format=CSV, ) assert isinstance(job, Job) @pytest.mark.asyncio - async def test_export_by_time_range(self, exports_api_async, sift_client): + async def test_export_by_time_range(self, exports_api_async, sift_client, nostromo_run): channels = await sift_client.async_.channels.list_(limit=1) assert channels, "No channels available" - now = datetime.now(timezone.utc) + start = nostromo_run.start_time job = await exports_api_async.export( - start_time=now - timedelta(hours=1), - stop_time=now, + start_time=start, + stop_time=start + timedelta(seconds=10), channels=[channels[0]], output_format=CSV, ) assert isinstance(job, Job) @pytest.mark.asyncio - async def test_wait_and_download(self, exports_api_async, sift_client, tmp_path): - runs = await sift_client.async_.runs.list_(limit=1) - assert runs, "No runs available" - job = await exports_api_async.export(runs=[runs[0]], output_format=CSV) + async def test_wait_and_download(self, exports_api_async, nostromo_run, tmp_path): + start = nostromo_run.start_time + job = await exports_api_async.export( + runs=[nostromo_run], + start_time=start, + stop_time=start + timedelta(seconds=10), + output_format=CSV, + ) files = await exports_api_async.wait_and_download( job=job, output_dir=tmp_path, timeout_secs=300 ) assert len(files) > 0 assert all(f.exists() for f in files) - def test_sync_export_by_run(self, exports_api_sync, sift_client): - runs = sift_client.runs.list_(limit=1) - assert runs, "No runs available" - job = exports_api_sync.export(runs=[runs[0]], output_format=CSV) + def test_sync_export_by_run(self, exports_api_sync, nostromo_run): + start = nostromo_run.start_time + job = exports_api_sync.export( + runs=[nostromo_run], + start_time=start, + stop_time=start + timedelta(seconds=10), + output_format=CSV, + ) assert isinstance(job, Job) - def test_sync_export_by_asset(self, exports_api_sync, sift_client): - assets = sift_client.assets.list_(limit=1) - assert assets, "No assets available" - now = datetime.now(timezone.utc) + def test_sync_export_by_asset(self, exports_api_sync, nostromo_asset, nostromo_run): + start = nostromo_run.start_time job = exports_api_sync.export( - assets=[assets[0]], - start_time=now - timedelta(hours=1), - stop_time=now, + assets=[nostromo_asset], + start_time=start, + stop_time=start + timedelta(seconds=10), output_format=CSV, ) assert isinstance(job, Job) - def test_sync_export_by_time_range(self, exports_api_sync, sift_client): + def test_sync_export_by_time_range(self, exports_api_sync, sift_client, nostromo_run): channels = sift_client.channels.list_(limit=1) assert channels, "No channels available" - now = datetime.now(timezone.utc) + start = nostromo_run.start_time job = exports_api_sync.export( - start_time=now - timedelta(hours=1), - stop_time=now, + start_time=start, + stop_time=start + timedelta(seconds=10), channels=[channels[0]], output_format=CSV, ) @@ -405,7 +413,7 @@ def download_setup(exports_api, mock_client, tmp_path): fake_file = tmp_path / "data.csv" fake_file.write_text("col1,col2\n1,2") mock_loop = MagicMock() - mock_loop.run_in_executor = AsyncMock(return_value=[fake_file]) + mock_loop.run_in_executor = AsyncMock(return_value=None) return { "api": exports_api, @@ -423,7 +431,8 @@ async def test_success(self, download_setup): job = MagicMock(spec=Job) job._id_or_error = "job-123" with patch("asyncio.get_running_loop", return_value=s["loop"]): - result = await s["api"].wait_and_download(job=job, output_dir=s["tmp_path"]) + with patch("sift_client.resources.exports.extract_zip", return_value=[s["fake_file"]]): + result = await s["api"].wait_and_download(job=job, output_dir=s["tmp_path"]) assert result == [s["fake_file"]] s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( job="job-123", polling_interval_secs=5, timeout_secs=None @@ -433,7 +442,8 @@ async def test_success(self, download_setup): async def test_job_id_string(self, download_setup): s = download_setup with patch("asyncio.get_running_loop", return_value=s["loop"]): - result = await s["api"].wait_and_download(job="job-456", output_dir=s["tmp_path"]) + with patch("sift_client.resources.exports.extract_zip", return_value=[s["fake_file"]]): + result = await s["api"].wait_and_download(job="job-456", output_dir=s["tmp_path"]) assert result == [s["fake_file"]] @pytest.mark.asyncio @@ -442,9 +452,10 @@ async def test_custom_polling_and_timeout(self, download_setup): job = MagicMock(spec=Job) job._id_or_error = "job-123" with patch("asyncio.get_running_loop", return_value=s["loop"]): - await s["api"].wait_and_download( - job=job, polling_interval_secs=1, timeout_secs=10, output_dir=s["tmp_path"] - ) + with patch("sift_client.resources.exports.extract_zip", return_value=[s["fake_file"]]): + await s["api"].wait_and_download( + job=job, polling_interval_secs=1, timeout_secs=10, output_dir=s["tmp_path"] + ) s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( job="job-123", polling_interval_secs=1, timeout_secs=10 ) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index a3b6d1a17..4e90a7146 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -13,7 +13,7 @@ from sift_client.sift_types.calculated_channel import CalculatedChannelCreate from sift_client.sift_types.channel import Channel from sift_client.sift_types.export import ExportOutputFormat # noqa: TC001 -from sift_client.sift_types.job import Job +from sift_client.sift_types.job import Job from sift_client.sift_types.run import Run if TYPE_CHECKING: From 95df19269c5c40f220e4e53a33916fbb031200b8 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 20 Mar 2026 13:30:42 -0700 Subject: [PATCH 49/53] python(fix): add sync wrapper tests for run_in_executer loop scenarios --- .../_tests/_internal/test_sync_wrapper.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/python/lib/sift_client/_tests/_internal/test_sync_wrapper.py b/python/lib/sift_client/_tests/_internal/test_sync_wrapper.py index 86841f2b6..9e4973324 100644 --- a/python/lib/sift_client/_tests/_internal/test_sync_wrapper.py +++ b/python/lib/sift_client/_tests/_internal/test_sync_wrapper.py @@ -82,6 +82,13 @@ async def async_method_with_exception(self) -> None: await asyncio.sleep(0.01) raise ValueError("Test exception") + async def async_method_with_executor(self) -> str: + """Test async method that uses run_in_executor, like wait_and_download.""" + self._record_call("async_method_with_executor") + loop = asyncio.get_running_loop() + result = await loop.run_in_executor(None, lambda: "executor_result") + return result + async def async_method_with_complex_args( self, arg1: str, arg2: dict[str, Any] | None = None, *args, **kwargs ) -> dict[str, Any]: @@ -183,3 +190,44 @@ def test_complex_arguments(self, mock_resource_sync): assert result["args"] == ("extra_arg",) assert result["kwargs"] == {"keyword": "keyword_value"} assert mock_resource_sync._async_impl.get_call_count("async_method_with_complex_args") == 1 + + +class TestSyncWrapperEventLoopScenarios: + """Test sync wrapper with run_in_executor under different event loop scenarios.""" + + @pytest.fixture + def mock_resource_sync(self): + mock_client = MockClient() + MockResource = generate_sync_api(MockResourceAsync, "MockResource") # noqa: N806 + return MockResource(mock_client, value="testVal") + + def test_sync_no_event_loop(self, mock_resource_sync): + """Plain sync call with no active event loop in the calling thread.""" + result = mock_resource_sync.async_method_with_executor() + assert result == "executor_result" + + def test_with_user_event_loop(self, mock_resource_sync): + """User has their own event loop running in another thread.""" + user_loop = asyncio.new_event_loop() + user_thread = threading.Thread(target=user_loop.run_forever, daemon=True) + user_thread.start() + try: + result = mock_resource_sync.async_method_with_executor() + assert result == "executor_result" + finally: + user_loop.call_soon_threadsafe(user_loop.stop) + user_thread.join(timeout=1.0) + user_loop.close() + + def test_sync_from_async(self, mock_resource_sync): + """Sync API called from inside a running async function.""" + + async def caller(): + return mock_resource_sync.async_method_with_executor() + + loop = asyncio.new_event_loop() + try: + result = loop.run_until_complete(caller()) + assert result == "executor_result" + finally: + loop.close() From 45f32a85470ba3eb005f4e419a990a0108ac7793 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 20 Mar 2026 14:53:07 -0700 Subject: [PATCH 50/53] python(refactor): add a run_sync_function util for running blocking calls off the event loop --- python/lib/sift_client/_internal/util/executor.py | 10 ++++++++++ python/lib/sift_client/resources/exports.py | 8 +++----- 2 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 python/lib/sift_client/_internal/util/executor.py diff --git a/python/lib/sift_client/_internal/util/executor.py b/python/lib/sift_client/_internal/util/executor.py new file mode 100644 index 000000000..87525cce0 --- /dev/null +++ b/python/lib/sift_client/_internal/util/executor.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +import asyncio +from typing import Any, Callable + + +async def run_sync_function(fn: Callable[..., Any], *args: Any) -> Any: + """Run a synchronous function in a thread pool to avoid blocking the event loop.""" + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, fn, *args) diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 4e90a7146..4cc8eebf4 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -1,12 +1,12 @@ from __future__ import annotations -import asyncio import tempfile from pathlib import Path from typing import TYPE_CHECKING from sift_client._internal.low_level_wrappers.exports import ExportsLowLevelClient from sift_client._internal.util.channels import resolve_calculated_channels +from sift_client._internal.util.executor import run_sync_function from sift_client._internal.util.file import download_file, extract_zip from sift_client.resources._base import ResourceBase from sift_client.sift_types.asset import Asset @@ -205,10 +205,8 @@ async def wait_and_download( # Run the synchronous download in a thread pool to avoid blocking the event loop rest_client = self.client.rest_client - loop = asyncio.get_running_loop() - await loop.run_in_executor( - None, - lambda: download_file(presigned_url, zip_file_path, rest_client=rest_client), + await run_sync_function( + lambda: download_file(presigned_url, zip_file_path, rest_client=rest_client) ) if not extract: From af904bb0cb7cc4ee88c2577488eb784fd11e1ed7 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 20 Mar 2026 15:45:28 -0700 Subject: [PATCH 51/53] python(refactor): move wait_and_download to JobsAPI and add the method on the job object itself --- .../_tests/resources/test_exports.py | 36 +++++---- python/lib/sift_client/resources/exports.py | 79 +------------------ python/lib/sift_client/resources/jobs.py | 77 +++++++++++++++++- .../resources/sync_stubs/__init__.pyi | 70 ++++++++-------- python/lib/sift_client/sift_types/job.py | 40 ++++++++++ 5 files changed, 172 insertions(+), 130 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index dd3a233be..77f175056 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -15,6 +15,7 @@ from sift_client import SiftClient from sift_client.resources import DataExportAPI from sift_client.resources.exports import DataExportAPIAsync +from sift_client.resources.jobs import JobsAPIAsync from sift_client.sift_types.asset import Asset from sift_client.sift_types.calculated_channel import ( CalculatedChannel, @@ -123,9 +124,7 @@ async def test_wait_and_download(self, exports_api_async, nostromo_run, tmp_path stop_time=start + timedelta(seconds=10), output_format=CSV, ) - files = await exports_api_async.wait_and_download( - job=job, output_dir=tmp_path, timeout_secs=300 - ) + files = job.wait_and_download(output_dir=tmp_path, timeout_secs=300) assert len(files) > 0 assert all(f.exists() for f in files) @@ -402,11 +401,15 @@ async def test_keeps_identifier_when_not_found(self): @pytest.fixture -def download_setup(exports_api, mock_client, tmp_path): +def download_setup(mock_client, tmp_path): completed_job = MagicMock(spec=Job) completed_job.job_status = JobStatus.FINISHED - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed_job) - exports_api._low_level_client.get_download_url = AsyncMock( + + jobs_api = JobsAPIAsync(mock_client) + jobs_api.wait_until_complete = AsyncMock(return_value=completed_job) + mock_client.async_.data_export = MagicMock() + mock_client.async_.data_export._low_level_client = MagicMock() + mock_client.async_.data_export._low_level_client.get_download_url = AsyncMock( return_value="https://dl.test/export.zip" ) @@ -416,7 +419,7 @@ def download_setup(exports_api, mock_client, tmp_path): mock_loop.run_in_executor = AsyncMock(return_value=None) return { - "api": exports_api, + "api": jobs_api, "client": mock_client, "tmp_path": tmp_path, "fake_file": fake_file, @@ -431,10 +434,10 @@ async def test_success(self, download_setup): job = MagicMock(spec=Job) job._id_or_error = "job-123" with patch("asyncio.get_running_loop", return_value=s["loop"]): - with patch("sift_client.resources.exports.extract_zip", return_value=[s["fake_file"]]): + with patch("sift_client.resources.jobs.extract_zip", return_value=[s["fake_file"]]): result = await s["api"].wait_and_download(job=job, output_dir=s["tmp_path"]) assert result == [s["fake_file"]] - s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( + s["api"].wait_until_complete.assert_awaited_once_with( job="job-123", polling_interval_secs=5, timeout_secs=None ) @@ -442,7 +445,7 @@ async def test_success(self, download_setup): async def test_job_id_string(self, download_setup): s = download_setup with patch("asyncio.get_running_loop", return_value=s["loop"]): - with patch("sift_client.resources.exports.extract_zip", return_value=[s["fake_file"]]): + with patch("sift_client.resources.jobs.extract_zip", return_value=[s["fake_file"]]): result = await s["api"].wait_and_download(job="job-456", output_dir=s["tmp_path"]) assert result == [s["fake_file"]] @@ -452,11 +455,11 @@ async def test_custom_polling_and_timeout(self, download_setup): job = MagicMock(spec=Job) job._id_or_error = "job-123" with patch("asyncio.get_running_loop", return_value=s["loop"]): - with patch("sift_client.resources.exports.extract_zip", return_value=[s["fake_file"]]): + with patch("sift_client.resources.jobs.extract_zip", return_value=[s["fake_file"]]): await s["api"].wait_and_download( job=job, polling_interval_secs=1, timeout_secs=10, output_dir=s["tmp_path"] ) - s["client"].async_.jobs.wait_until_complete.assert_awaited_once_with( + s["api"].wait_until_complete.assert_awaited_once_with( job="job-123", polling_interval_secs=1, timeout_secs=10 ) @@ -473,12 +476,11 @@ async def test_custom_polling_and_timeout(self, download_setup): (JobStatus.CANCELLED, None, "cancelled"), ], ) - async def test_terminal_status_raises(self, exports_api, mock_client, status, details, match): - job = MagicMock(spec=Job) - job._id_or_error = "job-err" + async def test_terminal_status_raises(self, mock_client, status, details, match): + jobs_api = JobsAPIAsync(mock_client) completed = MagicMock(spec=Job) completed.job_status = status completed.job_status_details = details - mock_client.async_.jobs.wait_until_complete = AsyncMock(return_value=completed) + jobs_api.wait_until_complete = AsyncMock(return_value=completed) with pytest.raises(RuntimeError, match=match): - await exports_api.wait_and_download(job=job) + await jobs_api.wait_and_download(job="job-err") diff --git a/python/lib/sift_client/resources/exports.py b/python/lib/sift_client/resources/exports.py index 4cc8eebf4..ed8676960 100644 --- a/python/lib/sift_client/resources/exports.py +++ b/python/lib/sift_client/resources/exports.py @@ -1,19 +1,14 @@ from __future__ import annotations -import tempfile -from pathlib import Path from typing import TYPE_CHECKING from sift_client._internal.low_level_wrappers.exports import ExportsLowLevelClient from sift_client._internal.util.channels import resolve_calculated_channels -from sift_client._internal.util.executor import run_sync_function -from sift_client._internal.util.file import download_file, extract_zip from sift_client.resources._base import ResourceBase from sift_client.sift_types.asset import Asset from sift_client.sift_types.calculated_channel import CalculatedChannelCreate from sift_client.sift_types.channel import Channel from sift_client.sift_types.export import ExportOutputFormat # noqa: TC001 -from sift_client.sift_types.job import Job from sift_client.sift_types.run import Run if TYPE_CHECKING: @@ -21,6 +16,7 @@ from sift_client.client import SiftClient from sift_client.sift_types.calculated_channel import CalculatedChannel + from sift_client.sift_types.job import Job class DataExportAPIAsync(ResourceBase): @@ -53,7 +49,7 @@ async def export( """Export data from Sift. Initiates an export on the server and returns a Job handle. Use - ``wait_and_download`` to poll for completion and download the files. + ``job.wait_and_download()`` to poll for completion and download the files. There are three ways to scope the export, determined by which arguments are provided: @@ -141,74 +137,3 @@ async def export( ) return await self.client.async_.jobs.get(job_id=job_id) - - async def wait_and_download( - self, - *, - job: Job | str, - polling_interval_secs: int = 5, - timeout_secs: int | None = None, - output_dir: str | Path | None = None, - extract: bool = True, - ) -> list[Path]: - """Wait for an export job to complete and download the exported files. - - Polls the job status at the given interval until the job is FINISHED, - FAILED, or CANCELLED, then downloads and extracts the exported data files. - - Args: - job: The export Job or job ID to wait for. - polling_interval_secs: Seconds between status polls. Defaults to 5. - timeout_secs: Maximum seconds to wait. If None, polls indefinitely. - output_dir: Directory to save the extracted files. If omitted, a - temporary directory is created automatically. - extract: If True (default), extract the zip and delete it, - returning paths to the extracted files. If False, keep the - zip file and return its path. - - Returns: - List of paths to the extracted data files, or a single-element - list containing the zip path if extract is False. - - Raises: - RuntimeError: If the export job fails or is cancelled. - TimeoutError: If the export job does not complete within timeout_secs. - """ - from sift_client.sift_types.job import DataExportStatusDetails, JobStatus - - job_id = job._id_or_error if isinstance(job, Job) else job - - completed_job = await self.client.async_.jobs.wait_until_complete( - job=job_id, - polling_interval_secs=polling_interval_secs, - timeout_secs=timeout_secs, - ) - if completed_job.job_status == JobStatus.FAILED: - if ( - isinstance(completed_job.job_status_details, DataExportStatusDetails) - and completed_job.job_status_details.error_message - ): - raise RuntimeError( - f"Export job '{job_id}' failed. {completed_job.job_status_details.error_message}" - ) - raise RuntimeError(f"Export job '{job_id}' failed.") - if completed_job.job_status == JobStatus.CANCELLED: - raise RuntimeError(f"Export job '{job_id}' was cancelled.") - - presigned_url = await self._low_level_client.get_download_url(job_id=job_id) - output_dir = ( - Path(output_dir) - if output_dir is not None - else Path(tempfile.mkdtemp(prefix="sift_export_")) - ) - zip_file_path = output_dir / f"{job_id}.zip" - - # Run the synchronous download in a thread pool to avoid blocking the event loop - rest_client = self.client.rest_client - await run_sync_function( - lambda: download_file(presigned_url, zip_file_path, rest_client=rest_client) - ) - - if not extract: - return [zip_file_path] - return extract_zip(zip_file_path, output_dir) diff --git a/python/lib/sift_client/resources/jobs.py b/python/lib/sift_client/resources/jobs.py index c3f775389..5789212d6 100644 --- a/python/lib/sift_client/resources/jobs.py +++ b/python/lib/sift_client/resources/jobs.py @@ -1,12 +1,16 @@ from __future__ import annotations import asyncio +import tempfile import time +from pathlib import Path from typing import TYPE_CHECKING from sift_client._internal.low_level_wrappers.jobs import JobsLowLevelClient +from sift_client._internal.util.executor import run_sync_function +from sift_client._internal.util.file import download_file, extract_zip from sift_client.resources._base import ResourceBase -from sift_client.sift_types.job import Job, JobStatus, JobType +from sift_client.sift_types.job import DataExportStatusDetails, Job, JobStatus, JobType from sift_client.util import cel_utils as cel if TYPE_CHECKING: @@ -189,3 +193,74 @@ async def wait_until_complete( if timeout_secs is not None and (time.monotonic() - start) >= timeout_secs: raise TimeoutError(f"Job {job_id} did not complete within {timeout_secs} seconds") await asyncio.sleep(polling_interval_secs) + + async def wait_and_download( + self, + *, + job: Job | str, + polling_interval_secs: int = 5, + timeout_secs: int | None = None, + output_dir: str | Path | None = None, + extract: bool = True, + ) -> list[Path]: + """Wait for an export job to complete and download the exported files. + + Polls the job status at the given interval until the job is FINISHED, + FAILED, or CANCELLED, then downloads and extracts the exported data files. + + Args: + job: The export Job or job ID to wait for. + polling_interval_secs: Seconds between status polls. Defaults to 5. + timeout_secs: Maximum seconds to wait. If None, polls indefinitely. + output_dir: Directory to save the extracted files. If omitted, a + temporary directory is created automatically. + extract: If True (default), extract the zip and delete it, + returning paths to the extracted files. If False, keep the + zip file and return its path. + + Returns: + List of paths to the extracted data files, or a single-element + list containing the zip path if extract is False. + + Raises: + RuntimeError: If the export job fails or is cancelled. + TimeoutError: If the export job does not complete within timeout_secs. + """ + job_id = job._id_or_error if isinstance(job, Job) else job + + completed_job = await self.wait_until_complete( + job=job_id, + polling_interval_secs=polling_interval_secs, + timeout_secs=timeout_secs, + ) + if completed_job.job_status == JobStatus.FAILED: + if ( + isinstance(completed_job.job_status_details, DataExportStatusDetails) + and completed_job.job_status_details.error_message + ): + raise RuntimeError( + f"Export job '{job_id}' failed. {completed_job.job_status_details.error_message}" + ) + raise RuntimeError(f"Export job '{job_id}' failed.") + if completed_job.job_status == JobStatus.CANCELLED: + raise RuntimeError(f"Export job '{job_id}' was cancelled.") + + presigned_url = await self.client.async_.data_export._low_level_client.get_download_url( + job_id=job_id + ) + output_dir = ( + Path(output_dir) + if output_dir is not None + else Path(tempfile.mkdtemp(prefix="sift_export_")) + ) + zip_file_path = output_dir / f"{job_id}.zip" + + # Run the synchronous download in a thread pool to avoid blocking the event loop + rest_client = self.client.rest_client + await run_sync_function( + lambda: download_file(presigned_url, zip_file_path, rest_client=rest_client) + ) + + if not extract: + return [zip_file_path] + return extract_zip(zip_file_path, output_dir) diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index 6d95a578f..f08a7fdf5 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -571,7 +571,7 @@ class DataExportAPI: """Export data from Sift. Initiates an export on the server and returns a Job handle. Use - ``wait_and_download`` to poll for completion and download the files. + ``job.wait_and_download()`` to poll for completion and download the files. There are three ways to scope the export, determined by which arguments are provided: @@ -621,40 +621,6 @@ class DataExportAPI: """ ... - def wait_and_download( - self, - *, - job: Job | str, - polling_interval_secs: int = 5, - timeout_secs: int | None = None, - output_dir: str | Path | None = None, - extract: bool = True, - ) -> list[Path]: - """Wait for an export job to complete and download the exported files. - - Polls the job status at the given interval until the job is FINISHED, - FAILED, or CANCELLED, then downloads and extracts the exported data files. - - Args: - job: The export Job or job ID to wait for. - polling_interval_secs: Seconds between status polls. Defaults to 5. - timeout_secs: Maximum seconds to wait. If None, polls indefinitely. - output_dir: Directory to save the extracted files. If omitted, a - temporary directory is created automatically. - extract: If True (default), extract the zip and delete it, - returning paths to the extracted files. If False, keep the - zip file and return its path. - - Returns: - List of paths to the extracted data files, or a single-element - list containing the zip path if extract is False. - - Raises: - RuntimeError: If the export job fails or is cancelled. - TimeoutError: If the export job does not complete within timeout_secs. - """ - ... - class FileAttachmentsAPI: """Sync counterpart to `FileAttachmentsAPIAsync`. @@ -885,6 +851,40 @@ class JobsAPI: """ ... + def wait_and_download( + self, + *, + job: Job | str, + polling_interval_secs: int = 5, + timeout_secs: int | None = None, + output_dir: str | Path | None = None, + extract: bool = True, + ) -> list[Path]: + """Wait for an export job to complete and download the exported files. + + Polls the job status at the given interval until the job is FINISHED, + FAILED, or CANCELLED, then downloads and extracts the exported data files. + + Args: + job: The export Job or job ID to wait for. + polling_interval_secs: Seconds between status polls. Defaults to 5. + timeout_secs: Maximum seconds to wait. If None, polls indefinitely. + output_dir: Directory to save the extracted files. If omitted, a + temporary directory is created automatically. + extract: If True (default), extract the zip and delete it, + returning paths to the extracted files. If False, keep the + zip file and return its path. + + Returns: + List of paths to the extracted data files, or a single-element + list containing the zip path if extract is False. + + Raises: + RuntimeError: If the export job fails or is cancelled. + TimeoutError: If the export job does not complete within timeout_secs. + """ + ... + def wait_until_complete( self, *, job: Job | str, polling_interval_secs: int = 5, timeout_secs: int | None = None ) -> Job: diff --git a/python/lib/sift_client/sift_types/job.py b/python/lib/sift_client/sift_types/job.py index 32b355763..97d68a1b5 100644 --- a/python/lib/sift_client/sift_types/job.py +++ b/python/lib/sift_client/sift_types/job.py @@ -16,6 +16,8 @@ from sift_client.sift_types._base import BaseType if TYPE_CHECKING: + from pathlib import Path + from sift_client.client import SiftClient @@ -312,3 +314,41 @@ def wait_until_complete( ) self._update(completed_job) return self + + def wait_and_download( + self, + *, + polling_interval_secs: int = 5, + timeout_secs: int | None = None, + output_dir: str | Path | None = None, + extract: bool = True, + ) -> list[Path]: + """Wait for an export job to complete and download the exported files. + + Polls the job status at the given interval until the job is FINISHED, + FAILED, or CANCELLED, then downloads and extracts the exported data files. + + Args: + polling_interval_secs: Seconds between status polls. Defaults to 5. + timeout_secs: Maximum seconds to wait. If None, polls indefinitely. + output_dir: Directory to save the extracted files. If omitted, a + temporary directory is created automatically. + extract: If True (default), extract the zip and delete it, + returning paths to the extracted files. If False, keep the + zip file and return its path. + + Returns: + List of paths to the extracted data files, or a single-element + list containing the zip path if extract is False. + + Raises: + RuntimeError: If the export job fails or is cancelled. + TimeoutError: If the export job does not complete within timeout_secs. + """ + return self.client.jobs.wait_and_download( + job=self, + polling_interval_secs=polling_interval_secs, + timeout_secs=timeout_secs, + output_dir=output_dir, + extract=extract, + ) From 1cfc7ecfe4992009fee74230bb59afcd65bd1d9b Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 20 Mar 2026 17:00:07 -0700 Subject: [PATCH 52/53] python(fix): scoped the export_by_asset test to use one channel --- .../sift_client/_tests/resources/test_exports.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 77f175056..149b3eacc 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -92,12 +92,17 @@ async def test_export_by_run(self, exports_api_async, nostromo_run): assert job.id_ is not None @pytest.mark.asyncio - async def test_export_by_asset(self, exports_api_async, nostromo_asset, nostromo_run): + async def test_export_by_asset( + self, exports_api_async, sift_client, nostromo_asset, nostromo_run + ): + channels = await sift_client.async_.channels.list_(limit=1) + assert channels, "No channels available" start = nostromo_run.start_time job = await exports_api_async.export( assets=[nostromo_asset], start_time=start, stop_time=start + timedelta(seconds=10), + channels=[channels[0]], output_format=CSV, ) assert isinstance(job, Job) @@ -138,12 +143,17 @@ def test_sync_export_by_run(self, exports_api_sync, nostromo_run): ) assert isinstance(job, Job) - def test_sync_export_by_asset(self, exports_api_sync, nostromo_asset, nostromo_run): + def test_sync_export_by_asset( + self, exports_api_sync, sift_client, nostromo_asset, nostromo_run + ): + channels = sift_client.channels.list_(limit=1) + assert channels, "No channels available" start = nostromo_run.start_time job = exports_api_sync.export( assets=[nostromo_asset], start_time=start, stop_time=start + timedelta(seconds=10), + channels=[channels[0]], output_format=CSV, ) assert isinstance(job, Job) From 3924682e7261854add77a30e215c0f5ab8810cc3 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Fri, 20 Mar 2026 17:26:08 -0700 Subject: [PATCH 53/53] python(fix): updated export_by_asset integration tests with ingested test data --- .../_tests/resources/test_exports.py | 70 ++++++++++++++----- 1 file changed, 54 insertions(+), 16 deletions(-) diff --git a/python/lib/sift_client/_tests/resources/test_exports.py b/python/lib/sift_client/_tests/resources/test_exports.py index 149b3eacc..35224f5d4 100644 --- a/python/lib/sift_client/_tests/resources/test_exports.py +++ b/python/lib/sift_client/_tests/resources/test_exports.py @@ -2,11 +2,14 @@ from __future__ import annotations +import uuid from datetime import datetime, timedelta, timezone from typing import TYPE_CHECKING from unittest.mock import AsyncMock, MagicMock, patch +from urllib.parse import urljoin import pytest +import requests from sift_client._internal.low_level_wrappers.exports import _build_calc_channel_configs from sift_client._internal.util.channels import resolve_calculated_channels @@ -77,6 +80,49 @@ def test_client_binding(sift_client): assert isinstance(sift_client.async_.data_export, DataExportAPIAsync) +INGEST_TIMESTAMP = datetime(2025, 6, 1, tzinfo=timezone.utc) + + +@pytest.fixture(scope="session") +def ingested_export_channel(sift_client, nostromo_asset): + """Ingest a single data point into a unique channel on the nostromo asset for export tests.""" + import time + + channel_name = f"export-test-{uuid.uuid4().hex[:8]}" + rest_client = sift_client.rest_client + ingest_url = urljoin(rest_client.base_url, "api/v2/ingest") + api_key = rest_client._config.api_key + + payload = { + "asset_name": nostromo_asset.name, + "data": [ + { + "timestamp": INGEST_TIMESTAMP.isoformat(), + "values": [{"channel": channel_name, "value": 42}], + } + ], + } + resp = requests.post( + ingest_url, + headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, + json=payload, + timeout=30, + ) + resp.raise_for_status() + + channel = None + for _ in range(20): + channel = sift_client.channels.find(name=channel_name, asset=nostromo_asset._id_or_error) + if channel is not None: + break + time.sleep(0.5) + assert channel is not None, f"Channel {channel_name} did not appear after ingest" + + yield channel + + sift_client.channels.archive([channel]) + + @pytest.mark.integration class TestExportsIntegration: @pytest.mark.asyncio @@ -93,16 +139,13 @@ async def test_export_by_run(self, exports_api_async, nostromo_run): @pytest.mark.asyncio async def test_export_by_asset( - self, exports_api_async, sift_client, nostromo_asset, nostromo_run + self, exports_api_async, nostromo_asset, ingested_export_channel ): - channels = await sift_client.async_.channels.list_(limit=1) - assert channels, "No channels available" - start = nostromo_run.start_time job = await exports_api_async.export( assets=[nostromo_asset], - start_time=start, - stop_time=start + timedelta(seconds=10), - channels=[channels[0]], + start_time=INGEST_TIMESTAMP - timedelta(seconds=1), + stop_time=INGEST_TIMESTAMP + timedelta(seconds=1), + channels=[ingested_export_channel], output_format=CSV, ) assert isinstance(job, Job) @@ -143,17 +186,12 @@ def test_sync_export_by_run(self, exports_api_sync, nostromo_run): ) assert isinstance(job, Job) - def test_sync_export_by_asset( - self, exports_api_sync, sift_client, nostromo_asset, nostromo_run - ): - channels = sift_client.channels.list_(limit=1) - assert channels, "No channels available" - start = nostromo_run.start_time + def test_sync_export_by_asset(self, exports_api_sync, nostromo_asset, ingested_export_channel): job = exports_api_sync.export( assets=[nostromo_asset], - start_time=start, - stop_time=start + timedelta(seconds=10), - channels=[channels[0]], + start_time=INGEST_TIMESTAMP - timedelta(seconds=1), + stop_time=INGEST_TIMESTAMP + timedelta(seconds=1), + channels=[ingested_export_channel], output_format=CSV, ) assert isinstance(job, Job)