diff --git a/lean/components/docker/lean_runner.py b/lean/components/docker/lean_runner.py index a2db4962..14597c2c 100644 --- a/lean/components/docker/lean_runner.py +++ b/lean/components/docker/lean_runner.py @@ -463,12 +463,14 @@ def _handle_data_providers(self, lean_config: Dict[str, Any], data_dir: Path): "map-file-provider", "QuantConnect.Data.Auxiliary.LocalZipMapFileProvider", "QuantConnect.Data.Auxiliary.LocalDiskMapFileProvider", - data_dir / "equity" / "usa" / "map_files") + data_dir, + "map_files") self._force_disk_provider_if_necessary(lean_config, "factor-file-provider", "QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider", "QuantConnect.Data.Auxiliary.LocalDiskFactorFileProvider", - data_dir / "equity" / "usa" / "factor_files") + data_dir, + "factor_files") def set_up_python_options(self, project_dir: Path, run_options: Dict[str, Any], image: DockerImage) -> None: """Sets up Docker run options specific to Python projects. @@ -838,14 +840,24 @@ def _force_disk_provider_if_necessary(self, config_key: str, zip_provider: str, disk_provider: str, - zip_dir: Path) -> None: + data_dir: Path, + auxiliary_dir_name: str) -> None: """Updates the Lean config to use the disk provider instead of the zip one if there are no zips to use. + The map-file/factor-file provider is a single global engine setting that applies to every market. + The zip providers read per-market '///_yyyyMMdd.zip' + archives, while the disk providers can only read loose '.csv' files and silently ignore those zip + archives. We must therefore only downgrade to the disk provider when there is no recent zip to lose + for *any* market, not just 'equity/usa'. Otherwise a futures-only data folder (whose map files ship + only inside the zip) would have its zip provider swapped out and silently stop resolving, e.g. + continuous futures would never map (Mapped: None) with no error raised. + :param lean_config: the Lean config to update :param config_key: the key of the configuration property :param zip_provider: the fully classified name of the zip provider for this property :param disk_provider: the fully classified name of the disk provider for this property - :param zip_dir: the directory where the zip provider looks for zip files + :param data_dir: the root data directory + :param auxiliary_dir_name: the auxiliary subdirectory the zip provider reads ("map_files"/"factor_files") """ from re import sub from datetime import datetime @@ -853,15 +865,25 @@ def _force_disk_provider_if_necessary(self, if lean_config.get(config_key, None) != zip_provider: return - if not zip_dir.exists(): - lean_config[config_key] = disk_provider - return - - zip_names = sorted([f.name for f in zip_dir.iterdir() if f.name.endswith(".zip")], reverse=True) - zip_names = [sub(r"[^\d]", "", name) for name in zip_names] - - if len(zip_names) == 0 or (datetime.now() - datetime.strptime(zip_names[0], "%Y%m%d")).days > 7: - lean_config[config_key] = disk_provider + # Keep the zip provider as long as any market has a recent map/factor file zip: the disk + # provider only reads loose csv, so downgrading would silently drop zip-shipped files (e.g. + # futures map files). We only need to know a recent zip exists, so we stop at the first one. + now = datetime.now() + for zip_file in data_dir.glob(f"*/*/{auxiliary_dir_name}/*.zip"): + try: + zip_date = datetime.strptime(sub(r"[^\d]", "", zip_file.name), "%Y%m%d") + except ValueError: + continue + if (now - zip_date).days <= 7: + self._logger.debug( + f"LeanRunner._force_disk_provider_if_necessary(): found recent '{auxiliary_dir_name}' zip " + f"'{zip_file.name}', keeping '{zip_provider}' for '{config_key}'") + return + + self._logger.debug( + f"LeanRunner._force_disk_provider_if_necessary(): no '{auxiliary_dir_name}' zip newer than 7 days, " + f"using '{disk_provider}' for '{config_key}'") + lean_config[config_key] = disk_provider def setup_language_specific_run_options(self, run_options, project_dir, algorithm_file, set_up_common_csharp_options_called, release, image: DockerImage) -> None: diff --git a/tests/components/docker/test_lean_runner.py b/tests/components/docker/test_lean_runner.py index 6242ecac..6892b443 100644 --- a/tests/components/docker/test_lean_runner.py +++ b/tests/components/docker/test_lean_runner.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from datetime import datetime from pathlib import Path from unittest import mock @@ -91,6 +92,70 @@ def create_lean_runner(docker_manager: mock.Mock) -> LeanRunner: xml_manager) +def test_handle_data_providers_keeps_zip_providers_for_futures_only_data() -> None: + # Regression: a futures-only data folder has fresh map/factor file zips under future/cme but no + # equity/usa data. The global zip providers must be kept; downgrading to the disk providers would + # silently break futures map-file resolution (continuous futures would never map, Mapped: None). + lean_runner = create_lean_runner(mock.Mock()) + + data_dir = Path.cwd() / "data" + fresh = datetime.now().strftime("%Y%m%d") + for auxiliary_dir_name in ["map_files", "factor_files"]: + directory = data_dir / "future" / "cme" / auxiliary_dir_name + directory.mkdir(parents=True, exist_ok=True) + (directory / f"{auxiliary_dir_name}_{fresh}.zip").touch() + + lean_config = { + "data-provider": "QuantConnect.Lean.Engine.DataFeeds.DefaultDataProvider", + "map-file-provider": "QuantConnect.Data.Auxiliary.LocalZipMapFileProvider", + "factor-file-provider": "QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider", + } + lean_runner._handle_data_providers(lean_config, data_dir) + + assert lean_config["map-file-provider"] == "QuantConnect.Data.Auxiliary.LocalZipMapFileProvider" + assert lean_config["factor-file-provider"] == "QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider" + + +def test_handle_data_providers_downgrades_to_disk_providers_without_any_zip() -> None: + # When the data folder only has loose csv auxiliary files (e.g. the free sample data) and no zips + # for any market, fall back to the disk providers which read those loose files. + lean_runner = create_lean_runner(mock.Mock()) + + data_dir = Path.cwd() / "data" + directory = data_dir / "equity" / "usa" / "map_files" + directory.mkdir(parents=True, exist_ok=True) + (directory / "spy.csv").touch() + + lean_config = { + "data-provider": "QuantConnect.Lean.Engine.DataFeeds.DefaultDataProvider", + "map-file-provider": "QuantConnect.Data.Auxiliary.LocalZipMapFileProvider", + "factor-file-provider": "QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider", + } + lean_runner._handle_data_providers(lean_config, data_dir) + + assert lean_config["map-file-provider"] == "QuantConnect.Data.Auxiliary.LocalDiskMapFileProvider" + assert lean_config["factor-file-provider"] == "QuantConnect.Data.Auxiliary.LocalDiskFactorFileProvider" + + +def test_handle_data_providers_downgrades_to_disk_providers_when_zips_are_stale() -> None: + # If the newest zip for every market is older than the freshness window, fall back to disk. + lean_runner = create_lean_runner(mock.Mock()) + + data_dir = Path.cwd() / "data" + directory = data_dir / "future" / "cme" / "map_files" + directory.mkdir(parents=True, exist_ok=True) + (directory / "map_files_20200101.zip").touch() + + lean_config = { + "data-provider": "QuantConnect.Lean.Engine.DataFeeds.DefaultDataProvider", + "map-file-provider": "QuantConnect.Data.Auxiliary.LocalZipMapFileProvider", + "factor-file-provider": "QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider", + } + lean_runner._handle_data_providers(lean_config, data_dir) + + assert lean_config["map-file-provider"] == "QuantConnect.Data.Auxiliary.LocalDiskMapFileProvider" + + @pytest.mark.parametrize("release", [False, True]) def test_run_lean_compiles_csharp_project_in_correct_configuration(release: bool) -> None: create_fake_lean_cli_directory()