Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions libs/labelbox/src/labelbox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@
)
from labelbox.schema.dataset import Dataset
from labelbox.schema.enums import AnnotationImportState
from labelbox.schema.project_sync import (
AutoQA,
AutoQaStatus,
CustomScore,
GranularRating,
ProjectSyncEntry,
ProjectSyncLabel,
ProjectSyncResult,
ProjectSyncReview,
ReviewedBy,
SubmittedBy,
)
from labelbox.schema.export_task import (
BufferedJsonConverterOutput,
ExportTask,
Expand Down
38 changes: 38 additions & 0 deletions libs/labelbox/src/labelbox/schema/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@
ProjectExportFilters,
build_filters,
)
from labelbox.schema.project_sync import (
ProjectSyncEntry,
ProjectSyncResult,
_to_gql_input,
)
from labelbox.schema.export_params import ProjectExportParams
from labelbox.schema.export_task import ExportTask
from labelbox.schema.identifiable import DataRowIdentifier
Expand Down Expand Up @@ -1001,6 +1006,39 @@ def create_batches(

return CreateBatchesTask(self.client, self.uid, batch_ids, task_ids)

def sync_external_project(
self,
entries: List[ProjectSyncEntry],
) -> ProjectSyncResult:
"""Syncs external project data — labels, metrics, and workflow state.

Processing is asynchronous. The returned submission ID can be used
to track the progress of the sync operation.

Args:
entries: A list of ProjectSyncEntry objects.

Returns:
A ProjectSyncResult containing the submission ID.
"""
mutation_str = """mutation syncExternalProjectPyApi($input: SyncExternalProjectInput!) {
syncExternalProject(input: $input) {
submissionId
}
}"""

params = {
"input": {
"projectId": self.uid,
"entries": [_to_gql_input(e) for e in entries],
}
}

response = self.client.execute(mutation_str, params)
payload = response["syncExternalProject"]

return ProjectSyncResult(submission_id=payload["submissionId"])

def create_batches_from_dataset(
self,
name_prefix: str,
Expand Down
121 changes: 121 additions & 0 deletions libs/labelbox/src/labelbox/schema/project_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from enum import Enum
from typing import Any, Dict, List, Optional

from pydantic import BaseModel


class AutoQaStatus(str, Enum):
Approve = "Approve"
Reject = "Reject"
Neutral = "Neutral"


class SubmittedBy(BaseModel):
email: str


class CustomScore(BaseModel):
name: str
value: float


class AutoQA(BaseModel):
status: AutoQaStatus
score: Optional[float] = None
feedback: Optional[str] = None
custom_scores: Optional[List[CustomScore]] = None


class ProjectSyncLabel(BaseModel):
submitted_by: SubmittedBy
auto_qa: Optional[AutoQA] = None
seconds_to_completion: Optional[float] = None
submitted_on: Optional[str] = None


class ReviewedBy(BaseModel):
email: str


class GranularRating(BaseModel):
score: int
comment: Optional[str] = None


class ProjectSyncReview(BaseModel):
reviewed_by: ReviewedBy
rating: Optional[GranularRating] = None
custom_scores: Optional[List[CustomScore]] = None


class ProjectSyncEntry(BaseModel):
task_id: str
content_url: Optional[str] = None
label: Optional[ProjectSyncLabel] = None
review: Optional[ProjectSyncReview] = None
queue_type: Optional[str] = None
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing row_status field in ProjectSyncEntry model

High Severity

The ProjectSyncEntry model is missing a row_status field, even though the PR description's example usage shows row_status="PASS" being passed. Since Pydantic ignores extra fields by default, any row_status value provided by users will be silently dropped. The _to_gql_input function also lacks serialization logic for this field, so even if the model were fixed, the value would never reach the GraphQL API.

Additional Locations (1)

Fix in Cursor Fix in Web



class ProjectSyncResult(BaseModel):
submission_id: str


def _to_gql_input(entry: ProjectSyncEntry) -> Dict[str, Any]:
"""Convert a ProjectSyncEntry to a camelCase dict matching the GQL schema."""
result: Dict[str, Any] = {"taskId": entry.task_id}

if entry.content_url is not None:
result["contentUrl"] = entry.content_url

if entry.label is not None:
label: Dict[str, Any] = {
"submittedBy": {"email": entry.label.submitted_by.email},
}

if entry.label.auto_qa is not None:
auto_qa: Dict[str, Any] = {
"status": entry.label.auto_qa.status.value,
}
if entry.label.auto_qa.score is not None:
auto_qa["score"] = entry.label.auto_qa.score
if entry.label.auto_qa.feedback is not None:
auto_qa["feedback"] = entry.label.auto_qa.feedback
if entry.label.auto_qa.custom_scores is not None:
auto_qa["customScores"] = [
{"name": cs.name, "value": cs.value}
for cs in entry.label.auto_qa.custom_scores
]
label["autoQA"] = auto_qa

if entry.label.seconds_to_completion is not None:
label["secondsToCompletion"] = entry.label.seconds_to_completion

if entry.label.submitted_on is not None:
label["submittedOn"] = entry.label.submitted_on

result["label"] = label
elif "label" in entry.model_fields_set:
result["label"] = None

if entry.review is not None:
review: Dict[str, Any] = {
"reviewedBy": {"email": entry.review.reviewed_by.email},
}
if entry.review.rating is not None:
rating: Dict[str, Any] = {"score": entry.review.rating.score}
if entry.review.rating.comment is not None:
rating["comment"] = entry.review.rating.comment
review["rating"] = rating
if entry.review.custom_scores is not None:
review["customScores"] = [
{"name": cs.name, "value": cs.value}
for cs in entry.review.custom_scores
]
result["review"] = review
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing explicit-None handling for review field in serialization

Medium Severity

The _to_gql_input function handles explicitly-set None values via model_fields_set checks for label and queue_type, but the review field is missing this same elif branch. When a user explicitly passes review=None to a ProjectSyncEntry, the review key is silently omitted from the GQL input instead of being sent as None, which is inconsistent with how label and queue_type behave.

Fix in Cursor Fix in Web

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expected


if entry.queue_type is not None:
result["queueType"] = entry.queue_type
elif "queue_type" in entry.model_fields_set:
result["queueType"] = None

return result
Loading