blob: 04593056bd90df5355fbf47ad8b37fab22dae36d [file] [log] [blame]
koder aka kdanilov22d134e2016-11-08 11:33:19 +02001"""
2This module contains interfaces for storage classes
3"""
4
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +02005import os
koder aka kdanilov108ac362017-01-19 20:17:16 +02006import re
koder aka kdanilov22d134e2016-11-08 11:33:19 +02007import abc
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +02008import shutil
koder aka kdanilov7f59d562016-12-26 01:34:23 +02009import sqlite3
koder aka kdanilov108ac362017-01-19 20:17:16 +020010import logging
koder aka kdanilovffaf48d2016-12-27 02:25:29 +020011from typing import Any, TypeVar, Type, IO, Tuple, cast, List, Dict, Iterable, Iterator
koder aka kdanilov39e449e2016-12-17 15:15:26 +020012
13import yaml
14try:
15 from yaml import CLoader as Loader, CDumper as Dumper # type: ignore
16except ImportError:
17 from yaml import Loader, Dumper # type: ignore
koder aka kdanilova732a602017-02-01 20:29:56 +020018import numpy
koder aka kdanilov22d134e2016-11-08 11:33:19 +020019
koder aka kdanilov108ac362017-01-19 20:17:16 +020020from .common_types import IStorable
21
22
23logger = logging.getLogger("wally")
koder aka kdanilov22d134e2016-11-08 11:33:19 +020024
25
koder aka kdanilov22d134e2016-11-08 11:33:19 +020026class ISimpleStorage(metaclass=abc.ABCMeta):
27 """interface for low-level storage, which doesn't support serialization
28 and can operate only on bytes"""
29
30 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +020031 def put(self, value: bytes, path: str) -> None:
koder aka kdanilov22d134e2016-11-08 11:33:19 +020032 pass
33
34 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +020035 def get(self, path: str) -> bytes:
koder aka kdanilov22d134e2016-11-08 11:33:19 +020036 pass
37
38 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +020039 def rm(self, path: str) -> None:
40 pass
41
42 @abc.abstractmethod
43 def sync(self) -> None:
koder aka kdanilov73084622016-11-16 21:51:08 +020044 pass
45
46 @abc.abstractmethod
koder aka kdanilov22d134e2016-11-08 11:33:19 +020047 def __contains__(self, path: str) -> bool:
48 pass
49
50 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +020051 def get_fd(self, path: str, mode: str = "rb+") -> IO:
koder aka kdanilov39e449e2016-12-17 15:15:26 +020052 pass
53
54 @abc.abstractmethod
kdanylov aka koder150b2192017-04-01 16:53:01 +030055 def get_fname(self, path: str) -> str:
56 pass
57
58 @abc.abstractmethod
koder aka kdanilov39e449e2016-12-17 15:15:26 +020059 def sub_storage(self, path: str) -> 'ISimpleStorage':
koder aka kdanilov22d134e2016-11-08 11:33:19 +020060 pass
61
koder aka kdanilovffaf48d2016-12-27 02:25:29 +020062 @abc.abstractmethod
63 def list(self, path: str) -> Iterator[Tuple[bool, str]]:
64 pass
65
koder aka kdanilov22d134e2016-11-08 11:33:19 +020066
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030067class ITSStorage(metaclass=abc.ABCMeta):
68 """interface for low-level storage, which doesn't support serialization
69 and can operate only on bytes"""
70
71 @abc.abstractmethod
72 def put(self, value: bytes, path: str) -> None:
73 pass
74
75 @abc.abstractmethod
76 def get(self, path: str) -> bytes:
77 pass
78
79 @abc.abstractmethod
80 def rm(self, path: str) -> None:
81 pass
82
83 @abc.abstractmethod
84 def sync(self) -> None:
85 pass
86
87 @abc.abstractmethod
88 def __contains__(self, path: str) -> bool:
89 pass
90
91 @abc.abstractmethod
92 def get_fd(self, path: str, mode: str = "rb+") -> IO:
93 pass
94
95 @abc.abstractmethod
96 def sub_storage(self, path: str) -> 'ISimpleStorage':
97 pass
98
99 @abc.abstractmethod
100 def list(self, path: str) -> Iterator[Tuple[bool, str]]:
101 pass
102
103
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200104class ISerializer(metaclass=abc.ABCMeta):
105 """Interface for serialization class"""
106 @abc.abstractmethod
koder aka kdanilovf2865172016-12-30 03:35:11 +0200107 def pack(self, value: IStorable) -> bytes:
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200108 pass
109
110 @abc.abstractmethod
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200111 def unpack(self, data: bytes) -> Any:
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200112 pass
113
114
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200115class FSStorage(ISimpleStorage):
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200116 """Store all data in files on FS"""
117
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200118 def __init__(self, root_path: str, existing: bool) -> None:
119 self.root_path = root_path
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200120 self.existing = existing
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300121 self.ignored = {'.', '..'}
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200122
123 def j(self, path: str) -> str:
124 return os.path.join(self.root_path, path)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200125
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200126 def put(self, value: bytes, path: str) -> None:
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200127 jpath = self.j(path)
128 os.makedirs(os.path.dirname(jpath), exist_ok=True)
129 with open(jpath, "wb") as fd:
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200130 fd.write(value)
131
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200132 def get(self, path: str) -> bytes:
koder aka kdanilov73084622016-11-16 21:51:08 +0200133 try:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200134 with open(self.j(path), "rb") as fd:
135 return fd.read()
136 except FileNotFoundError as exc:
137 raise KeyError(path) from exc
koder aka kdanilov73084622016-11-16 21:51:08 +0200138
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200139 def rm(self, path: str) -> None:
140 if os.path.isdir(path):
141 shutil.rmtree(path, ignore_errors=True)
142 elif os.path.exists(path):
143 os.unlink(path)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200144
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200145 def __contains__(self, path: str) -> bool:
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200146 return os.path.exists(self.j(path))
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200147
kdanylov aka koder150b2192017-04-01 16:53:01 +0300148 def get_fname(self, path: str) -> str:
149 return self.j(path)
150
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200151 def get_fd(self, path: str, mode: str = "rb+") -> IO[bytes]:
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200152 jpath = self.j(path)
153
154 if "cb" == mode:
155 create_on_fail = True
156 mode = "rb+"
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200157 os.makedirs(os.path.dirname(jpath), exist_ok=True)
koder aka kdanilova732a602017-02-01 20:29:56 +0200158 elif "ct" == mode:
159 create_on_fail = True
160 mode = "rt+"
161 os.makedirs(os.path.dirname(jpath), exist_ok=True)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200162 else:
163 create_on_fail = False
164
165 try:
166 fd = open(jpath, mode)
167 except IOError:
168 if not create_on_fail:
169 raise
koder aka kdanilova732a602017-02-01 20:29:56 +0200170
171 if 't' in mode:
172 fd = open(jpath, "wt")
173 else:
174 fd = open(jpath, "wb")
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200175
176 return cast(IO[bytes], fd)
177
178 def sub_storage(self, path: str) -> 'FSStorage':
179 return self.__class__(self.j(path), self.existing)
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200180
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200181 def sync(self):
182 pass
koder aka kdanilov23e6bdf2016-12-24 02:18:54 +0200183
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200184 def list(self, path: str) -> Iterator[Tuple[bool, str]]:
koder aka kdanilovf2865172016-12-30 03:35:11 +0200185 path = self.j(path)
186
187 if not os.path.exists(path):
188 return
189
190 if not os.path.isdir(path):
191 raise OSError("{!r} is not a directory".format(path))
192
193 for fobj in os.scandir(path):
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200194 if fobj.path not in self.ignored:
195 if fobj.is_dir():
196 yield False, fobj.name
197 else:
198 yield True, fobj.name
199
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200200
201class YAMLSerializer(ISerializer):
202 """Serialize data to yaml"""
koder aka kdanilovf2865172016-12-30 03:35:11 +0200203 def pack(self, value: IStorable) -> bytes:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200204 try:
205 return yaml.dump(value, Dumper=Dumper, encoding="utf8")
206 except Exception as exc:
207 raise ValueError("Can't pickle object {!r} to yaml".format(type(value))) from exc
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200208
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200209 def unpack(self, data: bytes) -> Any:
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200210 return yaml.load(data, Loader=Loader)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200211
212
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200213class SAFEYAMLSerializer(ISerializer):
214 """Serialize data to yaml"""
koder aka kdanilovf2865172016-12-30 03:35:11 +0200215 def pack(self, value: IStorable) -> bytes:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200216 try:
217 return yaml.safe_dump(value, encoding="utf8")
218 except Exception as exc:
219 raise ValueError("Can't pickle object {!r} to yaml".format(type(value))) from exc
220
221 def unpack(self, data: bytes) -> Any:
222 return yaml.safe_load(data)
223
224
225ObjClass = TypeVar('ObjClass', bound=IStorable)
226
227
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200228class _Raise:
229 pass
230
231
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200232class Storage:
233 """interface for storage"""
koder aka kdanilov108ac362017-01-19 20:17:16 +0200234
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300235 def __init__(self, sstorage: ISimpleStorage, serializer: ISerializer) -> None:
236 self.sstorage = sstorage
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200237 self.serializer = serializer
238
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200239 def sub_storage(self, *path: str) -> 'Storage':
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200240 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300241 return self.__class__(self.sstorage.sub_storage(fpath), self.serializer)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200242
koder aka kdanilov108ac362017-01-19 20:17:16 +0200243 def put(self, value: Any, *path: str) -> None:
244 dct_value = cast(IStorable, value).raw() if isinstance(value, IStorable) else value
245 serialized = self.serializer.pack(dct_value) # type: ignore
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200246 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300247 self.sstorage.put(serialized, fpath)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200248
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200249 def put_list(self, value: Iterable[IStorable], *path: str) -> None:
koder aka kdanilov108ac362017-01-19 20:17:16 +0200250 serialized = self.serializer.pack([obj.raw() for obj in value]) # type: ignore
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200251 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300252 self.sstorage.put(serialized, fpath)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200253
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200254 def get(self, path: str, default: Any = _Raise) -> Any:
255 try:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300256 vl = self.sstorage.get(path)
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200257 except:
258 if default is _Raise:
259 raise
260 return default
261
262 return self.serializer.unpack(vl)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200263
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200264 def rm(self, *path: str) -> None:
265 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300266 self.sstorage.rm(fpath)
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200267
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200268 def __contains__(self, path: str) -> bool:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300269 return path in self.sstorage
koder aka kdanilov73084622016-11-16 21:51:08 +0200270
koder aka kdanilov108ac362017-01-19 20:17:16 +0200271 def put_raw(self, val: bytes, *path: str) -> str:
272 fpath = "/".join(path)
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300273 self.sstorage.put(val, fpath)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200274 # TODO: dirty hack
275 return self.resolve_raw(fpath)
276
277 def resolve_raw(self, fpath) -> str:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300278 return cast(FSStorage, self.sstorage).j(fpath)
koder aka kdanilov3af3c332016-12-19 17:12:34 +0200279
280 def get_raw(self, *path: str) -> bytes:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300281 return self.sstorage.get("/".join(path))
koder aka kdanilov3af3c332016-12-19 17:12:34 +0200282
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200283 def append_raw(self, value: bytes, *path: str) -> None:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300284 with self.sstorage.get_fd("/".join(path), "rb+") as fd:
koder aka kdanilovf2865172016-12-30 03:35:11 +0200285 fd.seek(0, os.SEEK_END)
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200286 fd.write(value)
287
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200288 def get_fd(self, path: str, mode: str = "r") -> IO:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300289 return self.sstorage.get_fd(path, mode)
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200290
kdanylov aka koder150b2192017-04-01 16:53:01 +0300291 def get_fname(self, path: str) -> str:
292 return self.sstorage.get_fname(path)
293
koder aka kdanilov70227062016-11-26 23:23:21 +0200294 def load_list(self, obj_class: Type[ObjClass], *path: str) -> List[ObjClass]:
295 path_s = "/".join(path)
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200296 raw_val = cast(List[Dict[str, Any]], self.get(path_s))
koder aka kdanilov73084622016-11-16 21:51:08 +0200297 assert isinstance(raw_val, list)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200298 return [cast(ObjClass, obj_class.fromraw(val)) for val in raw_val]
koder aka kdanilov73084622016-11-16 21:51:08 +0200299
koder aka kdanilov70227062016-11-26 23:23:21 +0200300 def load(self, obj_class: Type[ObjClass], *path: str) -> ObjClass:
301 path_s = "/".join(path)
koder aka kdanilov108ac362017-01-19 20:17:16 +0200302 return cast(ObjClass, obj_class.fromraw(self.get(path_s)))
koder aka kdanilov73084622016-11-16 21:51:08 +0200303
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200304 def sync(self) -> None:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300305 self.sstorage.sync()
koder aka kdanilov73084622016-11-16 21:51:08 +0200306
koder aka kdanilov39e449e2016-12-17 15:15:26 +0200307 def __enter__(self) -> 'Storage':
308 return self
309
310 def __exit__(self, x: Any, y: Any, z: Any) -> None:
koder aka kdanilov7f59d562016-12-26 01:34:23 +0200311 self.sync()
koder aka kdanilov70227062016-11-26 23:23:21 +0200312
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200313 def list(self, *path: str) -> Iterator[Tuple[bool, str]]:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300314 return self.sstorage.list("/".join(path))
koder aka kdanilovffaf48d2016-12-27 02:25:29 +0200315
koder aka kdanilov108ac362017-01-19 20:17:16 +0200316 def _iter_paths(self,
317 root: str,
318 path_parts: List[str],
319 groups: Dict[str, str]) -> Iterator[Tuple[bool, str, Dict[str, str]]]:
320
321 curr = path_parts[0]
322 rest = path_parts[1:]
323
324 for is_file, name in self.list(root):
325 if rest and is_file:
326 continue
327
328 rr = re.match(pattern=curr + "$", string=name)
329 if rr:
330 if root:
331 path = root + "/" + name
332 else:
333 path = name
334
335 new_groups = rr.groupdict().copy()
336 new_groups.update(groups)
337
338 if rest:
339 yield from self._iter_paths(path, rest, new_groups)
340 else:
341 yield is_file, path, new_groups
342
koder aka kdanilov3d2bc4f2016-11-12 18:31:18 +0200343
344def make_storage(url: str, existing: bool = False) -> Storage:
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300345 return Storage(FSStorage(url, existing), SAFEYAMLSerializer())
koder aka kdanilov22d134e2016-11-08 11:33:19 +0200346