17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252 | class FileSystemStorageEngine(StorageEngine):
_metadata_file_suffix = ".metadata"
def __init__(self, base_path: Union[str, Path], storage_folder: str, **kwargs):
create_storage_folder = kwargs.get("create_storage_folder", True)
self.base_path = Path(base_path)
self._check_path(self.base_path)
self.storage_path = self.base_path / storage_folder
self.storage_folder = storage_folder
if create_storage_folder and not self.storage_path.is_dir():
self.storage_path.mkdir(parents=True, exist_ok=True)
self._check_path(self.storage_path)
self._kwargs = kwargs
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.storage_folder})"
def create_container(self, container_name: str, **kwargs) -> Optional[bool]:
container_path = self.storage_path / container_name
if self.exists(container_name=container_name):
return None
container_path.mkdir(parents=True, exist_ok=True)
return True
def remove_container(self, container_name: str, include_files: bool = True, **kwargs) -> Optional[bool]:
container_path = self.storage_path / container_name
if not self.exists(container_name=container_name):
log.debug(self._missing_text(container_name=container_name))
return None
try:
_rm: Any = shutil.rmtree if include_files else os.rmdir
_rm(container_path)
return True
except OSError as e:
log.debug(e)
return False
def exists(self, container_name: str, file_name: Optional[str] = None) -> bool:
container_path = self.storage_path / container_name
if file_name is None:
return container_path.exists() and container_path.is_dir()
file_path = container_path / file_name
return file_path.exists() and file_path.is_file()
def get_file_link(self, file_name: str, container_name: str, **kwargs) -> Optional[str]:
mode: str = kwargs.get("mode", "file") # Supported Modes: "url", "file"
if self.exists(container_name=container_name, file_name=file_name):
link = None
if mode.casefold() == "file":
file_path = self.storage_path / container_name / file_name
link = f"file://{file_path}"
elif mode.casefold() == "url":
link = f"/{self.storage_folder}/{container_name}/{file_name}"
return link
return None
def list_files(self, container_name: str, **kwargs) -> List[StorageFile]:
prefix = kwargs.get("prefix")
if not self.exists(container_name=container_name):
log.debug(self._missing_text(container_name=container_name))
return []
container_path = self.storage_path / container_name
# We need to handle the situation where prefix includes partial filename, in a same way as azure
# does. We will first see if container path with prefix (if requested) is a path - if not, we will treat prefix
# as a path with partial name.
if prefix:
container_path = container_path / prefix
if not container_path.is_dir():
if not prefix:
return []
# At this point we will treat provided path as a path with partial file name to search.
# We will return only those files in the parent dir, which name start with partial file name
if not container_path.parent.is_dir():
return []
files_in_parent_dir = [f for f in container_path.parent.iterdir() if f.is_file()]
storage_files_in_parent_dir = self._get_storage_files(file_paths=files_in_parent_dir)
matching_files = [f for f in storage_files_in_parent_dir if f.name.startswith(container_path.name)]
return matching_files
# Prefix was not provided or is treated as a path
files_in_container = [f for f in container_path.iterdir() if f.is_file()]
storage_files_in_container = self._get_storage_files(files_in_container)
return storage_files_in_container
def _get_storage_files(self, file_paths: Sequence[Path]) -> List[StorageFile]:
storage_files = []
for file_path in file_paths:
# Skip metadata files or dot prefixed files
if file_path.name.endswith(self._metadata_file_suffix) or file_path.name.startswith("."):
continue
storage_file = self._path_to_storage_file(
file_path=file_path, include_content=False, include_metadata=True
)
if storage_file is None:
continue
storage_files.append(storage_file)
return storage_files
def add_file(
self, file: StorageFile, container_name: str, overwrite: bool = False, create_container: bool = True, **kwargs
) -> Optional[bool]:
if create_container and not self.exists(container_name=container_name):
self.create_container(container_name=container_name)
if not overwrite and self.exists(container_name=container_name, file_name=file.name):
return None
file_path = self.storage_path / container_name / file.name
try:
if file.content:
file_path.write_bytes(data=file.content)
if file.metadata:
metadata_file_name = f"{file.name}{self._metadata_file_suffix}"
metadata_path = self.storage_path / container_name / metadata_file_name
clean_metadata = self._prepare_metadata_to_save(metadata=file.metadata)
metadata_content = json.dumps(clean_metadata, indent=1, sort_keys=True)
metadata_path.write_text(metadata_content)
return True
except (PermissionError, FileExistsError):
return False
def get_file(
self,
file_name: str,
container_name: str,
include_content: bool = True,
include_metadata: bool = True,
**kwargs,
) -> Optional[StorageFile]:
if not self.exists(container_name=container_name, file_name=file_name):
log.debug(self._missing_text(container_name=container_name, file_name=file_name))
return None
file_path = self.storage_path / container_name / file_name
storage_file = self._path_to_storage_file(
file_path=file_path, include_content=include_content, include_metadata=include_metadata
)
return storage_file
def delete_file(self, file_name: str, container_name: str, **kwargs) -> Optional[bool]:
if not self.exists(container_name=container_name, file_name=file_name):
log.debug(self._missing_text(container_name=container_name, file_name=file_name))
return None
file_path = self.storage_path / container_name / file_name
try:
os.remove(file_path)
return True
except PermissionError:
return False
def _path_to_storage_file(
self, file_path: Path, include_content: bool = False, include_metadata: bool = True
) -> Optional[StorageFile]:
if not file_path.is_file():
return None
content = file_path.read_bytes() if include_content else None
metadata = self._get_metadata_from_path(file_path=file_path) if include_metadata else {}
properties = self._get_properties_from_path(file_path=file_path)
content_type, _ = guess_type(file_path)
_file = StorageFile(
name=file_path.name,
properties=properties,
metadata=metadata,
content_type=content_type,
content=content,
)
return _file
@staticmethod
def _get_properties_from_path(file_path: Path) -> FileProperties:
file_stat = file_path.lstat()
properties = FileProperties(
last_modified=to_py_time(datetime.datetime.fromtimestamp(file_stat.st_mtime)),
size=file_stat.st_size,
etag=None,
)
return properties
@classmethod
def _prepare_metadata_to_save(cls, metadata: Dict) -> Dict:
"""
Converts provided metadata to format accepted by DiskStorageEngine.
This includes following rules:
Keys: ASCII String without empty characters
Values: String
:param metadata: dictionary with metadata to save.
:return: sanitized dictionary with metadata
"""
new_metadata = {}
for key, value in metadata.items():
try:
new_key = str(key).strip().casefold().replace(" ", "_")
new_value = str(value)
# skip non-ascii keys
if not new_key or not new_key.isascii():
continue
new_metadata[new_key] = new_value
except (ValueError, TypeError):
continue
return new_metadata
@classmethod
def _get_metadata_from_path(cls, file_path: Path) -> Dict:
"""
Reads metadata for file under the provided `file_path`.
Args:
file_path: Path to stored file (not to its metadata)
Returns:
Sanitized dictionary with metadata.
"""
_metadata_filename = f"{file_path.name}{cls._metadata_file_suffix}"
metadata_path = file_path.parent / _metadata_filename
if not metadata_path.is_file():
return {}
raw_metadata = json.loads(metadata_path.read_text())
if not isinstance(raw_metadata, dict):
return {}
new_metadata = {str(k): str(v) for k, v in raw_metadata.items()}
return new_metadata
@staticmethod
def _check_path(path: Path) -> None:
if not path.is_dir():
raise IOError(f"Base path ({path}) should be a directory.")
|