Skip to content

File Storage on Local filesystem

Bases: StorageEngine

Source code in reportconnectors/file_storage/engine/file_system.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
class FileSystemStorageEngine(StorageEngine):
    _metadata_file_suffix = ".metadata"

    def __init__(self, base_path: Union[str, Path], storage_folder: str, **kwargs):
        create_storage_folder = kwargs.get("create_storage_folder", True)
        self.base_path = Path(base_path)
        self._check_path(self.base_path)
        self.storage_path = self.base_path / storage_folder
        self.storage_folder = storage_folder
        if create_storage_folder and not self.storage_path.is_dir():
            self.storage_path.mkdir(parents=True, exist_ok=True)
        self._check_path(self.storage_path)
        self._kwargs = kwargs

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}({self.storage_folder})"

    def create_container(self, container_name: str, **kwargs) -> Optional[bool]:
        container_path = self.storage_path / container_name
        if self.exists(container_name=container_name):
            return None
        container_path.mkdir(parents=True, exist_ok=True)
        return True

    def remove_container(self, container_name: str, include_files: bool = True, **kwargs) -> Optional[bool]:
        container_path = self.storage_path / container_name
        if not self.exists(container_name=container_name):
            log.debug(self._missing_text(container_name=container_name))
            return None

        try:
            _rm: Any = shutil.rmtree if include_files else os.rmdir
            _rm(container_path)
            return True
        except OSError as e:
            log.debug(e)
            return False

    def exists(self, container_name: str, file_name: Optional[str] = None) -> bool:
        container_path = self.storage_path / container_name
        if file_name is None:
            return container_path.exists() and container_path.is_dir()
        file_path = container_path / file_name
        return file_path.exists() and file_path.is_file()

    def get_file_link(self, file_name: str, container_name: str, **kwargs) -> Optional[str]:
        mode: str = kwargs.get("mode", "file")  # Supported Modes: "url", "file"
        if self.exists(container_name=container_name, file_name=file_name):
            link = None
            if mode.casefold() == "file":
                file_path = self.storage_path / container_name / file_name
                link = f"file://{file_path}"
            elif mode.casefold() == "url":
                link = f"/{self.storage_folder}/{container_name}/{file_name}"
            return link
        return None

    def list_files(self, container_name: str, **kwargs) -> List[StorageFile]:
        prefix = kwargs.get("prefix")
        if not self.exists(container_name=container_name):
            log.debug(self._missing_text(container_name=container_name))
            return []

        container_path = self.storage_path / container_name

        # We need to handle the situation where prefix includes partial filename, in a same way as azure
        # does. We will first see if container path with prefix (if requested) is a path - if not, we will treat prefix
        # as a path with partial name.
        if prefix:
            container_path = container_path / prefix

        if not container_path.is_dir():
            if not prefix:
                return []

            # At this point we will treat provided path as a path with partial file name to search.
            # We will return only those files in the parent dir, which name start with partial file name
            if not container_path.parent.is_dir():
                return []
            files_in_parent_dir = [f for f in container_path.parent.iterdir() if f.is_file()]
            storage_files_in_parent_dir = self._get_storage_files(file_paths=files_in_parent_dir)

            matching_files = [f for f in storage_files_in_parent_dir if f.name.startswith(container_path.name)]
            return matching_files

        # Prefix was not provided or is treated as a path
        files_in_container = [f for f in container_path.iterdir() if f.is_file()]
        storage_files_in_container = self._get_storage_files(files_in_container)
        return storage_files_in_container

    def _get_storage_files(self, file_paths: Sequence[Path]) -> List[StorageFile]:
        storage_files = []
        for file_path in file_paths:
            # Skip metadata files or dot prefixed files
            if file_path.name.endswith(self._metadata_file_suffix) or file_path.name.startswith("."):
                continue
            storage_file = self._path_to_storage_file(
                file_path=file_path, include_content=False, include_metadata=True
            )
            if storage_file is None:
                continue
            storage_files.append(storage_file)
        return storage_files

    def add_file(
        self, file: StorageFile, container_name: str, overwrite: bool = False, create_container: bool = True, **kwargs
    ) -> Optional[bool]:
        if create_container and not self.exists(container_name=container_name):
            self.create_container(container_name=container_name)
        if not overwrite and self.exists(container_name=container_name, file_name=file.name):
            return None
        file_path = self.storage_path / container_name / file.name
        try:
            if file.content:
                file_path.write_bytes(data=file.content)
            if file.metadata:
                metadata_file_name = f"{file.name}{self._metadata_file_suffix}"
                metadata_path = self.storage_path / container_name / metadata_file_name
                clean_metadata = self._prepare_metadata_to_save(metadata=file.metadata)
                metadata_content = json.dumps(clean_metadata, indent=1, sort_keys=True)
                metadata_path.write_text(metadata_content)
            return True
        except (PermissionError, FileExistsError):
            return False

    def get_file(
        self,
        file_name: str,
        container_name: str,
        include_content: bool = True,
        include_metadata: bool = True,
        **kwargs,
    ) -> Optional[StorageFile]:
        if not self.exists(container_name=container_name, file_name=file_name):
            log.debug(self._missing_text(container_name=container_name, file_name=file_name))
            return None

        file_path = self.storage_path / container_name / file_name
        storage_file = self._path_to_storage_file(
            file_path=file_path, include_content=include_content, include_metadata=include_metadata
        )
        return storage_file

    def delete_file(self, file_name: str, container_name: str, **kwargs) -> Optional[bool]:
        if not self.exists(container_name=container_name, file_name=file_name):
            log.debug(self._missing_text(container_name=container_name, file_name=file_name))
            return None
        file_path = self.storage_path / container_name / file_name
        try:
            os.remove(file_path)
            return True
        except PermissionError:
            return False

    def _path_to_storage_file(
        self, file_path: Path, include_content: bool = False, include_metadata: bool = True
    ) -> Optional[StorageFile]:

        if not file_path.is_file():
            return None

        content = file_path.read_bytes() if include_content else None
        metadata = self._get_metadata_from_path(file_path=file_path) if include_metadata else {}
        properties = self._get_properties_from_path(file_path=file_path)
        content_type, _ = guess_type(file_path)
        _file = StorageFile(
            name=file_path.name,
            properties=properties,
            metadata=metadata,
            content_type=content_type,
            content=content,
        )
        return _file

    @staticmethod
    def _get_properties_from_path(file_path: Path) -> FileProperties:
        file_stat = file_path.lstat()
        properties = FileProperties(
            last_modified=to_py_time(datetime.datetime.fromtimestamp(file_stat.st_mtime)),
            size=file_stat.st_size,
            etag=None,
        )
        return properties

    @classmethod
    def _prepare_metadata_to_save(cls, metadata: Dict) -> Dict:
        """
        Converts provided metadata to format accepted by DiskStorageEngine.
        This includes following rules:
        Keys: ASCII String without empty characters
        Values: String
        :param metadata: dictionary with metadata to save.
        :return: sanitized dictionary with metadata
        """

        new_metadata = {}
        for key, value in metadata.items():
            try:
                new_key = str(key).strip().casefold().replace(" ", "_")
                new_value = str(value)
                # skip non-ascii keys
                if not new_key or not new_key.isascii():
                    continue
                new_metadata[new_key] = new_value
            except (ValueError, TypeError):
                continue

        return new_metadata

    @classmethod
    def _get_metadata_from_path(cls, file_path: Path) -> Dict:
        """
        Reads metadata for file under the provided `file_path`.

        Args:
            file_path: Path to stored file (not to its metadata)

        Returns:
            Sanitized dictionary with metadata.
        """

        _metadata_filename = f"{file_path.name}{cls._metadata_file_suffix}"
        metadata_path = file_path.parent / _metadata_filename

        if not metadata_path.is_file():
            return {}
        raw_metadata = json.loads(metadata_path.read_text())
        if not isinstance(raw_metadata, dict):
            return {}
        new_metadata = {str(k): str(v) for k, v in raw_metadata.items()}
        return new_metadata

    @staticmethod
    def _check_path(path: Path) -> None:
        if not path.is_dir():
            raise IOError(f"Base path ({path}) should be a directory.")