blob: ab52e127b9b5d2de26fcb54f89c9687a7c780e41 [file] [log] [blame]
koder aka kdanilov22d134e2016-11-08 11:33:19 +02001"""
2This module contains interfaces for storage classes
3"""
4
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +02005import os
koder aka kdanilov108ac362017-01-19 20:17:16 +02006import re
koder aka kdanilov22d134e2016-11-08 11:33:19 +02007import abc
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +02008import shutil
koder aka kdanilov7f59d562016-12-26 01:34:23 +02009import sqlite3
koder aka kdanilov108ac362017-01-19 20:17:16 +020010import logging
koder aka kdanilovffaf48d2016-12-27 02:25:29 +020011from typing import Any, TypeVar, Type, IO, Tuple, cast, List, Dict, Iterable, Iterator
koder aka kdanilov39e449e2016-12-17 15:15:26 +020012
13import yaml
14try:
15 from yaml import CLoader as Loader, CDumper as Dumper # type: ignore
16except ImportError:
17 from yaml import Loader, Dumper # type: ignore
koder aka kdanilova732a602017-02-01 20:29:56 +020018import numpy
koder aka kdanilov22d134e2016-11-08 11:33:19 +020019
koder aka kdanilov108ac362017-01-19 20:17:16 +020020from .common_types import IStorable
21
22
23logger = logging.getLogger("wally")
koder aka kdanilov22d134e2016-11-08 11:33:19 +020024
25
koder aka kdanilov22d134e2016-11-08 11:33:19 +020026class ISimpleStorage(metaclass=abc.ABCMeta):
27 """interface for low-level storage, which doesn't support serialization
28 and can operate only on bytes"""
29
30 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +020031 def put(self, value: bytes, path: str) -> None:
koder aka kdanilov22d134e2016-11-08 11:33:19 +020032 pass
33
34 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +020035 def get(self, path: str) -> bytes:
koder aka kdanilov22d134e2016-11-08 11:33:19 +020036 pass
37
38 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +020039 def rm(self, path: str) -> None:
40 pass
41
42 @abc.abstractmethod
43 def sync(self) -> None:
koder aka kdanilov73084622016-11-16 21:51:08 +020044 pass
45
46 @abc.abstractmethod
koder aka kdanilov22d134e2016-11-08 11:33:19 +020047 def __contains__(self, path: str) -> bool:
48 pass
49
50 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +020051 def get_fd(self, path: str, mode: str = "rb+") -> IO:
koder aka kdanilov39e449e2016-12-17 15:15:26 +020052 pass
53
54 @abc.abstractmethod
55 def sub_storage(self, path: str) -> 'ISimpleStorage':
koder aka kdanilov22d134e2016-11-08 11:33:19 +020056 pass
57
koder aka kdanilovffaf48d2016-12-27 02:25:29 +020058 @abc.abstractmethod
59 def list(self, path: str) -> Iterator[Tuple[bool, str]]:
60 pass
61
koder aka kdanilov22d134e2016-11-08 11:33:19 +020062
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030063class ITSStorage(metaclass=abc.ABCMeta):
64 """interface for low-level storage, which doesn't support serialization
65 and can operate only on bytes"""
66
67 @abc.abstractmethod
68 def put(self, value: bytes, path: str) -> None:
69 pass
70
71 @abc.abstractmethod
72 def get(self, path: str) -> bytes:
73 pass
74
75 @abc.abstractmethod
76 def rm(self, path: str) -> None:
77 pass
78
79 @abc.abstractmethod
80 def sync(self) -> None:
81 pass
82
83 @abc.abstractmethod
84 def __contains__(self, path: str) -> bool:
85 pass
86
87 @abc.abstractmethod
88 def get_fd(self, path: str, mode: str = "rb+") -> IO:
89 pass
90
91 @abc.abstractmethod
92 def sub_storage(self, path: str) -> 'ISimpleStorage':
93 pass
94
95 @abc.abstractmethod
96 def list(self, path: str) -> Iterator[Tuple[bool, str]]:
97 pass
98
99
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200100class ISerializer(metaclass=abc.ABCMeta):
101 """Interface for serialization class"""
102 @abc.abstractmethod
koder aka kdanilovf2865172016-12-30 03:35:11 +0200103 def pack(self, value: IStorable) -> bytes:
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200104 pass
105
106 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200107 def unpack(self, data: bytes) -> Any:
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200108 pass
109
110
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200111class FSStorage(ISimpleStorage):
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200112 """Store all data in files on FS"""
113
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200114 def __init__(self, root_path: str, existing: bool) -> None:
115 self.root_path = root_path
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200116 self.existing = existing
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300117 self.ignored = {'.', '..'}
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200118
119 def j(self, path: str) -> str:
120 return os.path.join(self.root_path, path)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200121
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200122 def put(self, value: bytes, path: str) -> None:
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200123 jpath = self.j(path)
124 os.makedirs(os.path.dirname(jpath), exist_ok=True)
125 with open(jpath, "wb") as fd:
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200126 fd.write(value)
127
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200128 def get(self, path: str) -> bytes:
koder aka kdanilov73084622016-11-16 21:51:08 +0200129 try:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200130 with open(self.j(path), "rb") as fd:
131 return fd.read()
132 except FileNotFoundError as exc:
133 raise KeyError(path) from exc
koder aka kdanilov73084622016-11-16 21:51:08 +0200134
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200135 def rm(self, path: str) -> None:
136 if os.path.isdir(path):
137 shutil.rmtree(path, ignore_errors=True)
138 elif os.path.exists(path):
139 os.unlink(path)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200140
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200141 def __contains__(self, path: str) -> bool:
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200142 return os.path.exists(self.j(path))
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200143
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200144 def get_fd(self, path: str, mode: str = "rb+") -> IO[bytes]:
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200145 jpath = self.j(path)
146
147 if "cb" == mode:
148 create_on_fail = True
149 mode = "rb+"
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200150 os.makedirs(os.path.dirname(jpath), exist_ok=True)
koder aka kdanilova732a602017-02-01 20:29:56 +0200151 elif "ct" == mode:
152 create_on_fail = True
153 mode = "rt+"
154 os.makedirs(os.path.dirname(jpath), exist_ok=True)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200155 else:
156 create_on_fail = False
157
158 try:
159 fd = open(jpath, mode)
160 except IOError:
161 if not create_on_fail:
162 raise
koder aka kdanilova732a602017-02-01 20:29:56 +0200163
164 if 't' in mode:
165 fd = open(jpath, "wt")
166 else:
167 fd = open(jpath, "wb")
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200168
169 return cast(IO[bytes], fd)
170
171 def sub_storage(self, path: str) -> 'FSStorage':
172 return self.__class__(self.j(path), self.existing)
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200173
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200174 def sync(self):
175 pass
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200176
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200177 def list(self, path: str) -> Iterator[Tuple[bool, str]]:
koder aka kdanilovf2865172016-12-30 03:35:11 +0200178 path = self.j(path)
179
180 if not os.path.exists(path):
181 return
182
183 if not os.path.isdir(path):
184 raise OSError("{!r} is not a directory".format(path))
185
186 for fobj in os.scandir(path):
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200187 if fobj.path not in self.ignored:
188 if fobj.is_dir():
189 yield False, fobj.name
190 else:
191 yield True, fobj.name
192
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200193
194class YAMLSerializer(ISerializer):
195 """Serialize data to yaml"""
koder aka kdanilovf2865172016-12-30 03:35:11 +0200196 def pack(self, value: IStorable) -> bytes:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200197 try:
198 return yaml.dump(value, Dumper=Dumper, encoding="utf8")
199 except Exception as exc:
200 raise ValueError("Can't pickle object {!r} to yaml".format(type(value))) from exc
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200201
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200202 def unpack(self, data: bytes) -> Any:
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200203 return yaml.load(data, Loader=Loader)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200204
205
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200206class SAFEYAMLSerializer(ISerializer):
207 """Serialize data to yaml"""
koder aka kdanilovf2865172016-12-30 03:35:11 +0200208 def pack(self, value: IStorable) -> bytes:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200209 try:
210 return yaml.safe_dump(value, encoding="utf8")
211 except Exception as exc:
212 raise ValueError("Can't pickle object {!r} to yaml".format(type(value))) from exc
213
214 def unpack(self, data: bytes) -> Any:
215 return yaml.safe_load(data)
216
217
218ObjClass = TypeVar('ObjClass', bound=IStorable)
219
220
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200221class _Raise:
222 pass
223
224
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200225class Storage:
226 """interface for storage"""
koder aka kdanilov108ac362017-01-19 20:17:16 +0200227
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300228 def __init__(self, sstorage: ISimpleStorage, serializer: ISerializer) -> None:
229 self.sstorage = sstorage
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200230 self.serializer = serializer
231
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200232 def sub_storage(self, *path: str) -> 'Storage':
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200233 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300234 return self.__class__(self.sstorage.sub_storage(fpath), self.serializer)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200235
koder aka kdanilov108ac362017-01-19 20:17:16 +0200236 def put(self, value: Any, *path: str) -> None:
237 dct_value = cast(IStorable, value).raw() if isinstance(value, IStorable) else value
238 serialized = self.serializer.pack(dct_value) # type: ignore
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200239 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300240 self.sstorage.put(serialized, fpath)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200241
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200242 def put_list(self, value: Iterable[IStorable], *path: str) -> None:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200243 serialized = self.serializer.pack([obj.raw() for obj in value]) # type: ignore
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200244 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300245 self.sstorage.put(serialized, fpath)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200246
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200247 def get(self, path: str, default: Any = _Raise) -> Any:
248 try:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300249 vl = self.sstorage.get(path)
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200250 except:
251 if default is _Raise:
252 raise
253 return default
254
255 return self.serializer.unpack(vl)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200256
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200257 def rm(self, *path: str) -> None:
258 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300259 self.sstorage.rm(fpath)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200260
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200261 def __contains__(self, path: str) -> bool:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300262 return path in self.sstorage
koder aka kdanilov73084622016-11-16 21:51:08 +0200263
koder aka kdanilov108ac362017-01-19 20:17:16 +0200264 def put_raw(self, val: bytes, *path: str) -> str:
265 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300266 self.sstorage.put(val, fpath)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200267 # TODO: dirty hack
268 return self.resolve_raw(fpath)
269
270 def resolve_raw(self, fpath) -> str:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300271 return cast(FSStorage, self.sstorage).j(fpath)
koder aka kdanilov3af3c332016-12-19 17:12:34 +0200272
273 def get_raw(self, *path: str) -> bytes:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300274 return self.sstorage.get("/".join(path))
koder aka kdanilov3af3c332016-12-19 17:12:34 +0200275
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200276 def append_raw(self, value: bytes, *path: str) -> None:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300277 with self.sstorage.get_fd("/".join(path), "rb+") as fd:
koder aka kdanilovf2865172016-12-30 03:35:11 +0200278 fd.seek(0, os.SEEK_END)
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200279 fd.write(value)
280
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200281 def get_fd(self, path: str, mode: str = "r") -> IO:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300282 return self.sstorage.get_fd(path, mode)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200283
koder aka kdanilov70227062016-11-26 23:23:21 +0200284 def load_list(self, obj_class: Type[ObjClass], *path: str) -> List[ObjClass]:
285 path_s = "/".join(path)
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200286 raw_val = cast(List[Dict[str, Any]], self.get(path_s))
koder aka kdanilov73084622016-11-16 21:51:08 +0200287 assert isinstance(raw_val, list)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200288 return [cast(ObjClass, obj_class.fromraw(val)) for val in raw_val]
koder aka kdanilov73084622016-11-16 21:51:08 +0200289
koder aka kdanilov70227062016-11-26 23:23:21 +0200290 def load(self, obj_class: Type[ObjClass], *path: str) -> ObjClass:
291 path_s = "/".join(path)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200292 return cast(ObjClass, obj_class.fromraw(self.get(path_s)))
koder aka kdanilov73084622016-11-16 21:51:08 +0200293
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200294 def sync(self) -> None:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300295 self.sstorage.sync()
koder aka kdanilov73084622016-11-16 21:51:08 +0200296
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200297 def __enter__(self) -> 'Storage':
298 return self
299
300 def __exit__(self, x: Any, y: Any, z: Any) -> None:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200301 self.sync()
koder aka kdanilov70227062016-11-26 23:23:21 +0200302
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200303 def list(self, *path: str) -> Iterator[Tuple[bool, str]]:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300304 return self.sstorage.list("/".join(path))
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200305
koder aka kdanilov108ac362017-01-19 20:17:16 +0200306 def _iter_paths(self,
307 root: str,
308 path_parts: List[str],
309 groups: Dict[str, str]) -> Iterator[Tuple[bool, str, Dict[str, str]]]:
310
311 curr = path_parts[0]
312 rest = path_parts[1:]
313
314 for is_file, name in self.list(root):
315 if rest and is_file:
316 continue
317
318 rr = re.match(pattern=curr + "$", string=name)
319 if rr:
320 if root:
321 path = root + "/" + name
322 else:
323 path = name
324
325 new_groups = rr.groupdict().copy()
326 new_groups.update(groups)
327
328 if rest:
329 yield from self._iter_paths(path, rest, new_groups)
330 else:
331 yield is_file, path, new_groups
332
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200333
334def make_storage(url: str, existing: bool = False) -> Storage:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300335 return Storage(FSStorage(url, existing), SAFEYAMLSerializer())
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200336