summaryrefslogtreecommitdiff
path: root/sci-libs/datasets/files/datasets-2.16.0-tests.patch
blob: 8cb89e824b3b479d09639f2d3df6c2776ffee854 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
--- a/tests/test_arrow_dataset.py	2024-02-20 21:53:24.248470991 +0100
+++ b/tests/test_arrow_dataset.py	2024-02-20 21:53:29.441804737 +0100
@@ -3982,7 +3982,6 @@
     [
         "relative/path",
         "/absolute/path",
-        "s3://bucket/relative/path",
         "hdfs://relative/path",
         "hdfs:///absolute/path",
     ],
--- a/tests/test_load.py	2024-02-20 22:12:13.699209107 +0100
+++ b/tests/test_load.py	2024-02-20 22:13:10.862626708 +0100
@@ -386,6 +386,7 @@
             hf_modules_cache=self.hf_modules_cache,
         )
 
+    @pytest.mark.skip(reason="")
     def test_HubDatasetModuleFactoryWithScript_dont_trust_remote_code(self):
         # "squad" has a dataset script
         factory = HubDatasetModuleFactoryWithScript(
@@ -402,6 +402,7 @@
         )
         self.assertRaises(ValueError, factory.get_module)
 
+    @pytest.mark.skip()
     def test_HubDatasetModuleFactoryWithScript_with_github_dataset(self):
         # "wmt_t2t" has additional imports (internal)
         factory = HubDatasetModuleFactoryWithScript(
@@ -411,6 +412,7 @@
         assert importlib.import_module(module_factory_result.module_path) is not None
         assert module_factory_result.builder_kwargs["base_path"].startswith(config.HF_ENDPOINT)
 
+    @pytest.mark.skip()
     def test_GithubMetricModuleFactory_with_internal_import(self):
         # "squad_v2" requires additional imports (internal)
         factory = GithubMetricModuleFactory(
@@ -419,6 +421,7 @@
         module_factory_result = factory.get_module()
         assert importlib.import_module(module_factory_result.module_path) is not None
 
+    @pytest.mark.skip()
     @pytest.mark.filterwarnings("ignore:GithubMetricModuleFactory is deprecated:FutureWarning")
     def test_GithubMetricModuleFactory_with_external_import(self):
         # "bleu" requires additional imports (external from github)
@@ -1032,6 +1035,7 @@
         datasets.load_dataset_builder(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, "non-existing-config")
 
 
+@pytest.mark.skip()
 @pytest.mark.parametrize("serializer", [pickle, dill])
 def test_load_dataset_builder_with_metadata_configs_pickable(serializer):
     builder = datasets.load_dataset_builder(SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA)
@@ -1153,6 +1157,7 @@
     assert len(builder.config.data_files["test"]) > 0
 
 
+@pytest.mark.skip()
 def test_load_dataset_builder_fail():
     with pytest.raises(DatasetNotFoundError):
         datasets.load_dataset_builder("blabla")
@@ -1168,6 +1173,7 @@
     assert isinstance(next(iter(dataset["train"])), dict)
 
 
+@pytest.mark.skip()
 def test_load_dataset_cached_local_script(dataset_loading_script_dir, data_dir, caplog):
     dataset = load_dataset(dataset_loading_script_dir, data_dir=data_dir)
     assert isinstance(dataset, DatasetDict)
--- a/tests/test_hf_gcp.py	2024-02-21 09:59:26.918397895 +0100
+++ b/tests/test_hf_gcp.py	2024-02-21 09:59:46.335100597 +0100
@@ -47,6 +47,7 @@
         ]
 
 
+@pytest.mark.skip("network")
 @parameterized.named_parameters(list_datasets_on_hf_gcp_parameters(with_config=True))
 class TestDatasetOnHfGcp(TestCase):
     dataset = None
--- a/tests/test_inspect.py	2024-02-21 10:03:32.315520016 +0100
+++ b/tests/test_inspect.py	2024-02-21 10:03:50.345553490 +0100
@@ -18,7 +18,7 @@
 pytestmark = pytest.mark.integration
 
 
-@pytest.mark.parametrize("path", ["paws", csv.__file__])
+@pytest.mark.parametrize("path", [csv.__file__])
 def test_inspect_dataset(path, tmp_path):
     inspect_dataset(path, tmp_path)
     script_name = Path(path).stem + ".py"
@@ -49,6 +49,7 @@
     assert list(info.splits.keys()) == expected_splits
 
 
+@pytest.mark.skip(reason="require network")
 def test_get_dataset_config_info_private(hf_token, hf_private_dataset_repo_txt_data):
     info = get_dataset_config_info(hf_private_dataset_repo_txt_data, config_name="default", token=hf_token)
     assert list(info.splits.keys()) == ["train"]
--- a/tests/test_data_files.py	2024-02-21 20:22:57.536160356 +0100
+++ b/tests/test_data_files.py	2024-02-21 20:25:00.153052174 +0100
@@ -378,6 +378,7 @@
         assert len(hub_dataset_repo_patterns_results[pattern]) == 0
 
 
+@pytest.mark.skip(reason="network")
 def test_DataFilesList_from_patterns_locally_with_extra_files(complex_data_dir, text_file):
     data_files_list = DataFilesList.from_patterns([_TEST_URL, text_file.as_posix()], complex_data_dir)
     assert list(data_files_list) == [_TEST_URL, text_file.as_posix()]
@@ -467,6 +468,7 @@
         assert Hasher.hash(data_files1) != Hasher.hash(data_files2)
 
 
+@pytest.mark.skip(reason="network")
 def test_DataFilesDict_from_patterns_locally_or_remote_hashing(text_file):
     patterns = {"train": [_TEST_URL], "test": [str(text_file)]}
     data_files1 = DataFilesDict.from_patterns(patterns)
--- a/tests/packaged_modules/test_folder_based_builder.py	2024-02-21 21:30:20.718922523 +0100
+++ b/tests/packaged_modules/test_folder_based_builder.py	2024-02-21 21:31:46.309061287 +0100
@@ -382,6 +382,7 @@
         assert example[column] is not None
 
 
+@pytest.mark.skip(reason="network")
 @pytest.mark.parametrize("remote", [True, False])
 @pytest.mark.parametrize("drop_labels", [None, True, False])
 def test_data_files_with_different_levels_no_metadata(
@@ -405,6 +406,7 @@
         assert all(example.keys() == {"base", "label"} for _, example in generator)
 
 
+@pytest.mark.skip(reason="network")
 @pytest.mark.parametrize("remote", [False, True])
 @pytest.mark.parametrize("drop_labels", [None, True, False])
 def test_data_files_with_one_label_no_metadata(data_files_with_one_label_no_metadata, drop_labels, remote, cache_dir):