From 5d0d315ddd606753c7df1a9431948419d34d56d4 Mon Sep 17 00:00:00 2001 From: jayendra13 Date: Wed, 4 Feb 2026 23:26:37 +0530 Subject: [PATCH] fix(schema): skip datasource loading when --config-file is provided When a non-root user runs `cloud-init schema --config-file `, the command was attempting to load the datasource from /var/lib/cloud/instance/obj.pkl, causing a confusing permission warning even though the datasource isn't needed for validating user-provided config files. Skip datasource loading when --config-file is provided since we're only validating the file's YAML syntax and schema, not system instance data. Fixes: #6592 --- cloudinit/config/schema.py | 36 ++++++----- tests/unittests/config/test_schema.py | 88 ++++++++++++++++++++------- 2 files changed, 87 insertions(+), 37 deletions(-) diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py index b21c77f89fa..53a21e2d148 100644 --- a/cloudinit/config/schema.py +++ b/cloudinit/config/schema.py @@ -1309,22 +1309,28 @@ def get_processed_or_fallback_path( return raw_path return primary_datapath - try: - paths = read_cfg_paths(fetch_existing_datasource="trust") - except (IOError, OSError) as e: - if e.errno == EACCES: - LOG.debug( - "Using default instance-data/user-data paths for non-root user" - ) - paths = read_cfg_paths() - else: - raise - except DataSourceNotFoundException: + # When --config-file is provided, we don't need the datasource + # because we're validating a user-provided file, not system instance data + if args.config_file: paths = read_cfg_paths() - LOG.warning( - "datasource not detected, using default" - " instance-data/user-data paths." - ) + else: + try: + paths = read_cfg_paths(fetch_existing_datasource="trust") + except (IOError, OSError) as e: + if e.errno == EACCES: + LOG.debug( + "Using default instance-data/user-data paths for " + "non-root user" + ) + paths = read_cfg_paths() + else: + raise + except DataSourceNotFoundException: + paths = read_cfg_paths() + LOG.warning( + "datasource not detected, using default" + " instance-data/user-data paths." + ) if args.instance_data: instance_data_path = args.instance_data elif os.getuid() != 0: diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py index 4892ffcdf5c..90c27fcf302 100644 --- a/tests/unittests/config/test_schema.py +++ b/tests/unittests/config/test_schema.py @@ -2020,6 +2020,48 @@ class TestHandleSchemaArgs: "Args", "config_file schema_type docs system annotate instance_data" ) + @mock.patch(M_PATH + "read_cfg_paths") + def test_handle_schema_args_config_file_skips_datasource_load( + self, + read_cfg_paths, + paths, + capsys, + caplog, + tmpdir, + ): + """When --config-file is provided, datasource loading is skipped. + + This ensures non-root users don't see pickle permission warnings + when validating a user-provided config file. + """ + read_cfg_paths.return_value = paths + user_data_fn = tmpdir.join("user-data") + with open(user_data_fn, "w") as f: + f.write( + dedent( + """\ + #cloud-config + packages: [sl] + """ + ) + ) + args = self.Args( + config_file=str(user_data_fn), + schema_type="cloud-config", + annotate=False, + docs=None, + system=None, + instance_data=None, + ) + handle_schema_args("unused", args) + assert "Valid schema" in capsys.readouterr().out + # When config_file is provided, read_cfg_paths should be called + # once without fetch_existing_datasource + read_cfg_paths.assert_called_once_with() + # Ensure no warnings about pickle loading or datasource detection + assert "pickle" not in caplog.text.lower() + assert "datasource not detected" not in caplog.text + @pytest.mark.parametrize( "failure, expected_logs", ( @@ -2033,40 +2075,42 @@ class TestHandleSchemaArgs: ), ), ) + @mock.patch(M_PATH + "os.getuid", return_value=0) @mock.patch(M_PATH + "read_cfg_paths") - def test_handle_schema_unable_to_read_cfg_paths( + def test_handle_schema_system_falls_back_on_datasource_failure( self, read_cfg_paths, + m_getuid, failure, expected_logs, paths, capsys, caplog, - tmpdir, ): + """When --system is used and datasource fails, fallback works.""" if isinstance(failure, IOError): failure.errno = EACCES + paths.get_ipath = paths.get_ipath_cur read_cfg_paths.side_effect = [failure, paths] - user_data_fn = tmpdir.join("user-data") - with open(user_data_fn, "w") as f: - f.write( - dedent( - """\ - #cloud-config - packages: [sl] - """ - ) - ) + # Create the cloud_config file that --system reads + cloud_config_file = paths.get_ipath_cur("cloud_config") + write_file(cloud_config_file, b"#cloud-config\npackages: [sl]\n") args = self.Args( - config_file=str(user_data_fn), - schema_type="cloud-config", + config_file=None, + schema_type=None, annotate=False, docs=None, - system=None, + system=True, instance_data=None, ) handle_schema_args("unused", args) - assert "Valid schema" in capsys.readouterr().out + # First call should be with fetch_existing_datasource="trust" + # Second call (fallback) should be without it + assert read_cfg_paths.call_count == 2 + assert read_cfg_paths.call_args_list[0] == mock.call( + fetch_existing_datasource="trust" + ) + assert read_cfg_paths.call_args_list[1] == mock.call() for expected_log in expected_logs: assert expected_log in caplog.text @@ -2279,9 +2323,9 @@ def test_handle_schema_args_jinja_with_errors( expected_err.format(cfg_file=user_data_fn, id_path=id_path) == err ) assert "deprec" not in caplog.text - assert read_cfg_paths.call_args_list == [ - mock.call(fetch_existing_datasource="trust") - ] + # When config_file is provided, read_cfg_paths is called without + # fetch_existing_datasource to avoid unnecessary datasource loading + assert read_cfg_paths.call_args_list == [mock.call()] @pytest.mark.parametrize( "uid, annotate, expected_out, expected_err, expectation", @@ -2359,9 +2403,9 @@ def test_handle_schema_args_unknown_header( expected_err.format(cfg_file=user_data_fn, id_path=id_path) == err ) assert "deprec" not in caplog.text - assert read_cfg_paths.call_args_list == [ - mock.call(fetch_existing_datasource="trust") - ] + # When config_file is provided, read_cfg_paths is called without + # fetch_existing_datasource to avoid unnecessary datasource loading + assert read_cfg_paths.call_args_list == [mock.call()] class TestDeprecation: