blob: aa90ac930387fecdc42d3e808aa8601e38e04f2a [file] [log] [blame]
koder aka kdanilov22d134e2016-11-08 11:33:19 +02001"""
2This module contains interfaces for storage classes
3"""
4
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +02005import os
koder aka kdanilov108ac362017-01-19 20:17:16 +02006import re
koder aka kdanilov22d134e2016-11-08 11:33:19 +02007import abc
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +02008import shutil
koder aka kdanilov7f59d562016-12-26 01:34:23 +02009import sqlite3
koder aka kdanilov108ac362017-01-19 20:17:16 +020010import logging
koder aka kdanilovffaf48d2016-12-27 02:25:29 +020011from typing import Any, TypeVar, Type, IO, Tuple, cast, List, Dict, Iterable, Iterator
koder aka kdanilov39e449e2016-12-17 15:15:26 +020012
13import yaml
14try:
15 from yaml import CLoader as Loader, CDumper as Dumper # type: ignore
16except ImportError:
17 from yaml import Loader, Dumper # type: ignore
koder aka kdanilova732a602017-02-01 20:29:56 +020018import numpy
koder aka kdanilov22d134e2016-11-08 11:33:19 +020019
koder aka kdanilov108ac362017-01-19 20:17:16 +020020from .common_types import IStorable
21
22
23logger = logging.getLogger("wally")
koder aka kdanilov22d134e2016-11-08 11:33:19 +020024
25
koder aka kdanilov22d134e2016-11-08 11:33:19 +020026class ISimpleStorage(metaclass=abc.ABCMeta):
27 """interface for low-level storage, which doesn't support serialization
28 and can operate only on bytes"""
29
30 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +020031 def put(self, value: bytes, path: str) -> None:
koder aka kdanilov22d134e2016-11-08 11:33:19 +020032 pass
33
34 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +020035 def get(self, path: str) -> bytes:
koder aka kdanilov22d134e2016-11-08 11:33:19 +020036 pass
37
38 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +020039 def rm(self, path: str) -> None:
40 pass
41
42 @abc.abstractmethod
43 def sync(self) -> None:
koder aka kdanilov73084622016-11-16 21:51:08 +020044 pass
45
46 @abc.abstractmethod
koder aka kdanilov22d134e2016-11-08 11:33:19 +020047 def __contains__(self, path: str) -> bool:
48 pass
49
50 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +020051 def get_fd(self, path: str, mode: str = "rb+") -> IO:
koder aka kdanilov39e449e2016-12-17 15:15:26 +020052 pass
53
54 @abc.abstractmethod
kdanylov aka koder150b2192017-04-01 16:53:01 +030055 def get_fname(self, path: str) -> str:
56 pass
57
58 @abc.abstractmethod
koder aka kdanilov39e449e2016-12-17 15:15:26 +020059 def sub_storage(self, path: str) -> 'ISimpleStorage':
koder aka kdanilov22d134e2016-11-08 11:33:19 +020060 pass
61
koder aka kdanilovffaf48d2016-12-27 02:25:29 +020062 @abc.abstractmethod
63 def list(self, path: str) -> Iterator[Tuple[bool, str]]:
64 pass
65
koder aka kdanilov22d134e2016-11-08 11:33:19 +020066
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030067class ITSStorage(metaclass=abc.ABCMeta):
68 """interface for low-level storage, which doesn't support serialization
69 and can operate only on bytes"""
70
71 @abc.abstractmethod
72 def put(self, value: bytes, path: str) -> None:
73 pass
74
75 @abc.abstractmethod
76 def get(self, path: str) -> bytes:
77 pass
78
79 @abc.abstractmethod
80 def rm(self, path: str) -> None:
81 pass
82
83 @abc.abstractmethod
84 def sync(self) -> None:
85 pass
86
87 @abc.abstractmethod
88 def __contains__(self, path: str) -> bool:
89 pass
90
91 @abc.abstractmethod
92 def get_fd(self, path: str, mode: str = "rb+") -> IO:
93 pass
94
95 @abc.abstractmethod
96 def sub_storage(self, path: str) -> 'ISimpleStorage':
97 pass
98
99 @abc.abstractmethod
100 def list(self, path: str) -> Iterator[Tuple[bool, str]]:
101 pass
102
103
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200104class ISerializer(metaclass=abc.ABCMeta):
105 """Interface for serialization class"""
106 @abc.abstractmethod
koder aka kdanilovf2865172016-12-30 03:35:11 +0200107 def pack(self, value: IStorable) -> bytes:
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200108 pass
109
110 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200111 def unpack(self, data: bytes) -> Any:
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200112 pass
113
114
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200115class FSStorage(ISimpleStorage):
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200116 """Store all data in files on FS"""
117
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200118 def __init__(self, root_path: str, existing: bool) -> None:
119 self.root_path = root_path
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200120 self.existing = existing
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300121 self.ignored = {'.', '..'}
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200122
123 def j(self, path: str) -> str:
124 return os.path.join(self.root_path, path)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200125
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200126 def put(self, value: bytes, path: str) -> None:
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200127 jpath = self.j(path)
128 os.makedirs(os.path.dirname(jpath), exist_ok=True)
129 with open(jpath, "wb") as fd:
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200130 fd.write(value)
131
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200132 def get(self, path: str) -> bytes:
koder aka kdanilov73084622016-11-16 21:51:08 +0200133 try:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200134 with open(self.j(path), "rb") as fd:
135 return fd.read()
136 except FileNotFoundError as exc:
137 raise KeyError(path) from exc
koder aka kdanilov73084622016-11-16 21:51:08 +0200138
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200139 def rm(self, path: str) -> None:
140 if os.path.isdir(path):
141 shutil.rmtree(path, ignore_errors=True)
142 elif os.path.exists(path):
143 os.unlink(path)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200144
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200145 def __contains__(self, path: str) -> bool:
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200146 return os.path.exists(self.j(path))
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200147
kdanylov aka koder150b2192017-04-01 16:53:01 +0300148 def get_fname(self, path: str) -> str:
149 return self.j(path)
150
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200151 def get_fd(self, path: str, mode: str = "rb+") -> IO[bytes]:
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200152 jpath = self.j(path)
153
154 if "cb" == mode:
155 create_on_fail = True
156 mode = "rb+"
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200157 os.makedirs(os.path.dirname(jpath), exist_ok=True)
koder aka kdanilova732a602017-02-01 20:29:56 +0200158 elif "ct" == mode:
159 create_on_fail = True
160 mode = "rt+"
161 os.makedirs(os.path.dirname(jpath), exist_ok=True)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200162 else:
163 create_on_fail = False
164
165 try:
166 fd = open(jpath, mode)
167 except IOError:
168 if not create_on_fail:
169 raise
koder aka kdanilova732a602017-02-01 20:29:56 +0200170
171 if 't' in mode:
172 fd = open(jpath, "wt")
173 else:
174 fd = open(jpath, "wb")
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200175
176 return cast(IO[bytes], fd)
177
178 def sub_storage(self, path: str) -> 'FSStorage':
179 return self.__class__(self.j(path), self.existing)
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200180
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200181 def sync(self):
182 pass
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200183
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200184 def list(self, path: str) -> Iterator[Tuple[bool, str]]:
koder aka kdanilovf2865172016-12-30 03:35:11 +0200185 path = self.j(path)
186
187 if not os.path.exists(path):
188 return
189
190 if not os.path.isdir(path):
191 raise OSError("{!r} is not a directory".format(path))
192
193 for fobj in os.scandir(path):
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200194 if fobj.path not in self.ignored:
195 if fobj.is_dir():
196 yield False, fobj.name
197 else:
198 yield True, fobj.name
199
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200200
201class YAMLSerializer(ISerializer):
202 """Serialize data to yaml"""
koder aka kdanilovf2865172016-12-30 03:35:11 +0200203 def pack(self, value: IStorable) -> bytes:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200204 try:
205 return yaml.dump(value, Dumper=Dumper, encoding="utf8")
206 except Exception as exc:
207 raise ValueError("Can't pickle object {!r} to yaml".format(type(value))) from exc
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200208
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200209 def unpack(self, data: bytes) -> Any:
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200210 return yaml.load(data, Loader=Loader)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200211
212
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200213class SAFEYAMLSerializer(ISerializer):
214 """Serialize data to yaml"""
koder aka kdanilovf2865172016-12-30 03:35:11 +0200215 def pack(self, value: IStorable) -> bytes:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200216 try:
217 return yaml.safe_dump(value, encoding="utf8")
218 except Exception as exc:
219 raise ValueError("Can't pickle object {!r} to yaml".format(type(value))) from exc
220
221 def unpack(self, data: bytes) -> Any:
222 return yaml.safe_load(data)
223
224
225ObjClass = TypeVar('ObjClass', bound=IStorable)
226
227
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200228class _Raise:
229 pass
230
231
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200232class Storage:
233 """interface for storage"""
koder aka kdanilov108ac362017-01-19 20:17:16 +0200234
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300235 def __init__(self, sstorage: ISimpleStorage, serializer: ISerializer) -> None:
236 self.sstorage = sstorage
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200237 self.serializer = serializer
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +0300238 self.cache = {}
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200239
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200240 def sub_storage(self, *path: str) -> 'Storage':
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200241 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300242 return self.__class__(self.sstorage.sub_storage(fpath), self.serializer)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200243
koder aka kdanilov108ac362017-01-19 20:17:16 +0200244 def put(self, value: Any, *path: str) -> None:
245 dct_value = cast(IStorable, value).raw() if isinstance(value, IStorable) else value
246 serialized = self.serializer.pack(dct_value) # type: ignore
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200247 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300248 self.sstorage.put(serialized, fpath)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200249
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200250 def put_list(self, value: Iterable[IStorable], *path: str) -> None:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200251 serialized = self.serializer.pack([obj.raw() for obj in value]) # type: ignore
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200252 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300253 self.sstorage.put(serialized, fpath)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200254
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200255 def get(self, path: str, default: Any = _Raise) -> Any:
256 try:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300257 vl = self.sstorage.get(path)
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200258 except:
259 if default is _Raise:
260 raise
261 return default
262
263 return self.serializer.unpack(vl)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200264
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200265 def rm(self, *path: str) -> None:
266 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300267 self.sstorage.rm(fpath)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200268
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200269 def __contains__(self, path: str) -> bool:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300270 return path in self.sstorage
koder aka kdanilov73084622016-11-16 21:51:08 +0200271
koder aka kdanilov108ac362017-01-19 20:17:16 +0200272 def put_raw(self, val: bytes, *path: str) -> str:
273 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300274 self.sstorage.put(val, fpath)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200275 # TODO: dirty hack
276 return self.resolve_raw(fpath)
277
278 def resolve_raw(self, fpath) -> str:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300279 return cast(FSStorage, self.sstorage).j(fpath)
koder aka kdanilov3af3c332016-12-19 17:12:34 +0200280
281 def get_raw(self, *path: str) -> bytes:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300282 return self.sstorage.get("/".join(path))
koder aka kdanilov3af3c332016-12-19 17:12:34 +0200283
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200284 def append_raw(self, value: bytes, *path: str) -> None:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300285 with self.sstorage.get_fd("/".join(path), "rb+") as fd:
koder aka kdanilovf2865172016-12-30 03:35:11 +0200286 fd.seek(0, os.SEEK_END)
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200287 fd.write(value)
288
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200289 def get_fd(self, path: str, mode: str = "r") -> IO:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300290 return self.sstorage.get_fd(path, mode)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200291
kdanylov aka koder150b2192017-04-01 16:53:01 +0300292 def get_fname(self, path: str) -> str:
293 return self.sstorage.get_fname(path)
294
koder aka kdanilov70227062016-11-26 23:23:21 +0200295 def load_list(self, obj_class: Type[ObjClass], *path: str) -> List[ObjClass]:
296 path_s = "/".join(path)
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +0300297 if path_s not in self.cache:
298 raw_val = cast(List[Dict[str, Any]], self.get(path_s))
299 assert isinstance(raw_val, list)
300 self.cache[path_s] = [cast(ObjClass, obj_class.fromraw(val)) for val in raw_val]
301 return self.cache[path_s]
koder aka kdanilov73084622016-11-16 21:51:08 +0200302
koder aka kdanilov70227062016-11-26 23:23:21 +0200303 def load(self, obj_class: Type[ObjClass], *path: str) -> ObjClass:
304 path_s = "/".join(path)
kdanylov aka kodercdfcdaf2017-04-29 10:03:39 +0300305 if path_s not in self.cache:
306 self.cache[path_s] = cast(ObjClass, obj_class.fromraw(self.get(path_s)))
307 return self.cache[path_s]
koder aka kdanilov73084622016-11-16 21:51:08 +0200308
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200309 def sync(self) -> None:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300310 self.sstorage.sync()
koder aka kdanilov73084622016-11-16 21:51:08 +0200311
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200312 def __enter__(self) -> 'Storage':
313 return self
314
315 def __exit__(self, x: Any, y: Any, z: Any) -> None:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200316 self.sync()
koder aka kdanilov70227062016-11-26 23:23:21 +0200317
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200318 def list(self, *path: str) -> Iterator[Tuple[bool, str]]:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300319 return self.sstorage.list("/".join(path))
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200320
koder aka kdanilov108ac362017-01-19 20:17:16 +0200321 def _iter_paths(self,
322 root: str,
323 path_parts: List[str],
324 groups: Dict[str, str]) -> Iterator[Tuple[bool, str, Dict[str, str]]]:
325
326 curr = path_parts[0]
327 rest = path_parts[1:]
328
329 for is_file, name in self.list(root):
330 if rest and is_file:
331 continue
332
333 rr = re.match(pattern=curr + "$", string=name)
334 if rr:
335 if root:
336 path = root + "/" + name
337 else:
338 path = name
339
340 new_groups = rr.groupdict().copy()
341 new_groups.update(groups)
342
343 if rest:
344 yield from self._iter_paths(path, rest, new_groups)
345 else:
346 yield is_file, path, new_groups
347
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200348
349def make_storage(url: str, existing: bool = False) -> Storage:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300350 return Storage(FSStorage(url, existing), SAFEYAMLSerializer())
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200351