From 1d337347739ac65bec540bec2814efe05b49a080 Mon Sep 17 00:00:00 2001 From: kostas-jakeliunas-sb Date: Mon, 23 Mar 2026 12:52:27 +0200 Subject: [PATCH] Add version and platform info to User-Agent header Requests to our API now send a descriptive User-Agent like "scrapingbee-cli/1.2.2 Python/3.14.0 (Darwin arm64)" instead of the bare "ScrapingBee/CLI". This lets us identify CLI traffic, distinguish versions, and debug platform-specific issues from incoming API logs. Both the aiohttp client (scrape, usage, SERP commands) and the Scrapy crawl path use the same shared helper. --- src/scrapingbee_cli/__init__.py | 13 +++++++++++++ src/scrapingbee_cli/client.py | 3 ++- src/scrapingbee_cli/crawl.py | 3 ++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/scrapingbee_cli/__init__.py b/src/scrapingbee_cli/__init__.py index 2387160..a1b08eb 100644 --- a/src/scrapingbee_cli/__init__.py +++ b/src/scrapingbee_cli/__init__.py @@ -1,3 +1,16 @@ """ScrapingBee CLI - Command-line client for the ScrapingBee API.""" +import platform +import sys + __version__ = "1.2.2" + + +def user_agent() -> str: + """Build a descriptive User-Agent string for API requests. + + Format: scrapingbee-cli/1.2.2 Python/3.12.0 (Darwin arm64) + """ + py = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + os_info = f"{platform.system()} {platform.machine()}" + return f"scrapingbee-cli/{__version__} Python/{py} ({os_info})" diff --git a/src/scrapingbee_cli/client.py b/src/scrapingbee_cli/client.py index 617dd68..0c3a58c 100644 --- a/src/scrapingbee_cli/client.py +++ b/src/scrapingbee_cli/client.py @@ -10,6 +10,7 @@ import aiohttp import certifi +from . import user_agent from .config import BASE_URL @@ -45,7 +46,7 @@ async def __aenter__(self) -> Client: self._session = aiohttp.ClientSession( connector=connector, timeout=timeout, - headers={"User-Agent": "ScrapingBee/CLI"}, + headers={"User-Agent": user_agent()}, ) return self diff --git a/src/scrapingbee_cli/crawl.py b/src/scrapingbee_cli/crawl.py index 015b383..50fa1b2 100644 --- a/src/scrapingbee_cli/crawl.py +++ b/src/scrapingbee_cli/crawl.py @@ -20,6 +20,7 @@ from scrapy.utils.project import get_project_settings from scrapy_scrapingbee import ScrapingBeeRequest +from . import user_agent from .batch import _batch_subdir_for_extension, extension_for_crawl if TYPE_CHECKING: @@ -487,7 +488,7 @@ async def _fetch() -> bytes: ] or [loc.text.strip() for loc in root.findall(".//url/loc") if loc.text and loc.text.strip()] -USER_AGENT_CLI = "ScrapingBee/CLI" +USER_AGENT_CLI = user_agent() def default_crawl_output_dir() -> str: