koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 1 | """ |
| 2 | This module contains interfaces for storage classes |
| 3 | """ |
| 4 | |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 5 | import os |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 6 | import re |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 7 | import abc |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame] | 8 | import shutil |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 9 | import sqlite3 |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 10 | import logging |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame] | 11 | from typing import Any, TypeVar, Type, IO, Tuple, cast, List, Dict, Iterable, Iterator |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 12 | |
| 13 | import yaml |
| 14 | try: |
| 15 | from yaml import CLoader as Loader, CDumper as Dumper # type: ignore |
| 16 | except ImportError: |
| 17 | from yaml import Loader, Dumper # type: ignore |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame^] | 18 | import numpy |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 19 | |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 20 | from .common_types import IStorable |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame^] | 21 | from .utils import shape2str, str2shape |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 22 | |
| 23 | |
| 24 | logger = logging.getLogger("wally") |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 25 | |
| 26 | |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 27 | class ISimpleStorage(metaclass=abc.ABCMeta): |
| 28 | """interface for low-level storage, which doesn't support serialization |
| 29 | and can operate only on bytes""" |
| 30 | |
| 31 | @abc.abstractmethod |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 32 | def put(self, value: bytes, path: str) -> None: |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 33 | pass |
| 34 | |
| 35 | @abc.abstractmethod |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 36 | def get(self, path: str) -> bytes: |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 37 | pass |
| 38 | |
| 39 | @abc.abstractmethod |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 40 | def rm(self, path: str) -> None: |
| 41 | pass |
| 42 | |
| 43 | @abc.abstractmethod |
| 44 | def sync(self) -> None: |
koder aka kdanilov | 7308462 | 2016-11-16 21:51:08 +0200 | [diff] [blame] | 45 | pass |
| 46 | |
| 47 | @abc.abstractmethod |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 48 | def __contains__(self, path: str) -> bool: |
| 49 | pass |
| 50 | |
| 51 | @abc.abstractmethod |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 52 | def get_fd(self, path: str, mode: str = "rb+") -> IO: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 53 | pass |
| 54 | |
| 55 | @abc.abstractmethod |
| 56 | def sub_storage(self, path: str) -> 'ISimpleStorage': |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 57 | pass |
| 58 | |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame] | 59 | @abc.abstractmethod |
| 60 | def list(self, path: str) -> Iterator[Tuple[bool, str]]: |
| 61 | pass |
| 62 | |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 63 | |
| 64 | class ISerializer(metaclass=abc.ABCMeta): |
| 65 | """Interface for serialization class""" |
| 66 | @abc.abstractmethod |
koder aka kdanilov | f286517 | 2016-12-30 03:35:11 +0200 | [diff] [blame] | 67 | def pack(self, value: IStorable) -> bytes: |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 68 | pass |
| 69 | |
| 70 | @abc.abstractmethod |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 71 | def unpack(self, data: bytes) -> Any: |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 72 | pass |
| 73 | |
| 74 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 75 | class DBStorage(ISimpleStorage): |
| 76 | |
| 77 | create_tb_sql = "CREATE TABLE IF NOT EXISTS wally_storage (key text, data blob, type text)" |
| 78 | insert_sql = "INSERT INTO wally_storage VALUES (?, ?, ?)" |
| 79 | update_sql = "UPDATE wally_storage SET data=?, type=? WHERE key=?" |
| 80 | select_sql = "SELECT data, type FROM wally_storage WHERE key=?" |
| 81 | contains_sql = "SELECT 1 FROM wally_storage WHERE key=?" |
| 82 | rm_sql = "DELETE FROM wally_storage WHERE key LIKE '{}%'" |
| 83 | list2_sql = "SELECT key, length(data), type FROM wally_storage" |
koder aka kdanilov | f286517 | 2016-12-30 03:35:11 +0200 | [diff] [blame] | 84 | SQLITE3_THREADSAFE = 1 |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 85 | |
| 86 | def __init__(self, db_path: str = None, existing: bool = False, |
| 87 | prefix: str = None, db: sqlite3.Connection = None) -> None: |
| 88 | |
| 89 | assert not prefix or "'" not in prefix, "Broken sql prefix {!r}".format(prefix) |
| 90 | |
| 91 | if db_path: |
| 92 | self.existing = existing |
| 93 | if existing: |
| 94 | if not os.path.isfile(db_path): |
| 95 | raise IOError("No storage found at {!r}".format(db_path)) |
| 96 | |
| 97 | os.makedirs(os.path.dirname(db_path), exist_ok=True) |
koder aka kdanilov | f286517 | 2016-12-30 03:35:11 +0200 | [diff] [blame] | 98 | if sqlite3.threadsafety != self.SQLITE3_THREADSAFE: |
| 99 | raise RuntimeError("Sqlite3 compiled without threadsafe support, can't use DB storage on it") |
| 100 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 101 | try: |
koder aka kdanilov | f286517 | 2016-12-30 03:35:11 +0200 | [diff] [blame] | 102 | self.db = sqlite3.connect(db_path, check_same_thread=False) |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 103 | except sqlite3.OperationalError as exc: |
| 104 | raise IOError("Can't open database at {!r}".format(db_path)) from exc |
| 105 | |
| 106 | self.db.execute(self.create_tb_sql) |
| 107 | else: |
| 108 | if db is None: |
| 109 | raise ValueError("Either db or db_path parameter must be passed") |
| 110 | self.db = db |
| 111 | |
| 112 | if prefix is None: |
| 113 | self.prefix = "" |
| 114 | elif not prefix.endswith('/'): |
| 115 | self.prefix = prefix + '/' |
| 116 | else: |
| 117 | self.prefix = prefix |
| 118 | |
| 119 | def put(self, value: bytes, path: str) -> None: |
| 120 | c = self.db.cursor() |
| 121 | fpath = self.prefix + path |
| 122 | c.execute(self.contains_sql, (fpath,)) |
| 123 | if len(c.fetchall()) == 0: |
| 124 | c.execute(self.insert_sql, (fpath, value, 'yaml')) |
| 125 | else: |
| 126 | c.execute(self.update_sql, (value, 'yaml', fpath)) |
| 127 | |
| 128 | def get(self, path: str) -> bytes: |
| 129 | c = self.db.cursor() |
| 130 | c.execute(self.select_sql, (self.prefix + path,)) |
| 131 | res = cast(List[Tuple[bytes, str]], c.fetchall()) # type: List[Tuple[bytes, str]] |
| 132 | if not res: |
| 133 | raise KeyError(path) |
| 134 | assert len(res) == 1 |
| 135 | val, tp = res[0] |
| 136 | assert tp == 'yaml' |
| 137 | return val |
| 138 | |
| 139 | def rm(self, path: str) -> None: |
| 140 | c = self.db.cursor() |
| 141 | path = self.prefix + path |
| 142 | assert "'" not in path, "Broken sql path {!r}".format(path) |
| 143 | c.execute(self.rm_sql.format(path)) |
| 144 | |
| 145 | def __contains__(self, path: str) -> bool: |
| 146 | c = self.db.cursor() |
| 147 | path = self.prefix + path |
| 148 | c.execute(self.contains_sql, (self.prefix + path,)) |
| 149 | return len(c.fetchall()) != 0 |
| 150 | |
| 151 | def print_tree(self): |
| 152 | c = self.db.cursor() |
| 153 | c.execute(self.list2_sql) |
| 154 | data = list(c.fetchall()) |
| 155 | data.sort() |
| 156 | print("------------------ DB ---------------------") |
| 157 | for key, data_ln, type in data: |
| 158 | print(key, data_ln, type) |
| 159 | print("------------------ END --------------------") |
| 160 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 161 | def sub_storage(self, path: str) -> 'DBStorage': |
| 162 | return self.__class__(prefix=self.prefix + path, db=self.db) |
| 163 | |
| 164 | def sync(self): |
| 165 | self.db.commit() |
| 166 | |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame] | 167 | def get_fd(self, path: str, mode: str = "rb+") -> IO[bytes]: |
| 168 | raise NotImplementedError("SQLITE3 doesn't provide fd-like interface") |
| 169 | |
| 170 | def list(self, path: str) -> Iterator[Tuple[bool, str]]: |
| 171 | raise NotImplementedError("SQLITE3 doesn't provide list method") |
| 172 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 173 | |
| 174 | DB_REL_PATH = "__db__.db" |
| 175 | |
| 176 | |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 177 | class FSStorage(ISimpleStorage): |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 178 | """Store all data in files on FS""" |
| 179 | |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 180 | def __init__(self, root_path: str, existing: bool) -> None: |
| 181 | self.root_path = root_path |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 182 | self.existing = existing |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame] | 183 | self.ignored = {self.j(DB_REL_PATH), '.', '..'} |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 184 | |
| 185 | def j(self, path: str) -> str: |
| 186 | return os.path.join(self.root_path, path) |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 187 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 188 | def put(self, value: bytes, path: str) -> None: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 189 | jpath = self.j(path) |
| 190 | os.makedirs(os.path.dirname(jpath), exist_ok=True) |
| 191 | with open(jpath, "wb") as fd: |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 192 | fd.write(value) |
| 193 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 194 | def get(self, path: str) -> bytes: |
koder aka kdanilov | 7308462 | 2016-11-16 21:51:08 +0200 | [diff] [blame] | 195 | try: |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 196 | with open(self.j(path), "rb") as fd: |
| 197 | return fd.read() |
| 198 | except FileNotFoundError as exc: |
| 199 | raise KeyError(path) from exc |
koder aka kdanilov | 7308462 | 2016-11-16 21:51:08 +0200 | [diff] [blame] | 200 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 201 | def rm(self, path: str) -> None: |
| 202 | if os.path.isdir(path): |
| 203 | shutil.rmtree(path, ignore_errors=True) |
| 204 | elif os.path.exists(path): |
| 205 | os.unlink(path) |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 206 | |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 207 | def __contains__(self, path: str) -> bool: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 208 | return os.path.exists(self.j(path)) |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 209 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 210 | def get_fd(self, path: str, mode: str = "rb+") -> IO[bytes]: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 211 | jpath = self.j(path) |
| 212 | |
| 213 | if "cb" == mode: |
| 214 | create_on_fail = True |
| 215 | mode = "rb+" |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame] | 216 | os.makedirs(os.path.dirname(jpath), exist_ok=True) |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame^] | 217 | elif "ct" == mode: |
| 218 | create_on_fail = True |
| 219 | mode = "rt+" |
| 220 | os.makedirs(os.path.dirname(jpath), exist_ok=True) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 221 | else: |
| 222 | create_on_fail = False |
| 223 | |
| 224 | try: |
| 225 | fd = open(jpath, mode) |
| 226 | except IOError: |
| 227 | if not create_on_fail: |
| 228 | raise |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame^] | 229 | |
| 230 | if 't' in mode: |
| 231 | fd = open(jpath, "wt") |
| 232 | else: |
| 233 | fd = open(jpath, "wb") |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 234 | |
| 235 | return cast(IO[bytes], fd) |
| 236 | |
| 237 | def sub_storage(self, path: str) -> 'FSStorage': |
| 238 | return self.__class__(self.j(path), self.existing) |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 239 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 240 | def sync(self): |
| 241 | pass |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame] | 242 | |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame] | 243 | def list(self, path: str) -> Iterator[Tuple[bool, str]]: |
koder aka kdanilov | f286517 | 2016-12-30 03:35:11 +0200 | [diff] [blame] | 244 | path = self.j(path) |
| 245 | |
| 246 | if not os.path.exists(path): |
| 247 | return |
| 248 | |
| 249 | if not os.path.isdir(path): |
| 250 | raise OSError("{!r} is not a directory".format(path)) |
| 251 | |
| 252 | for fobj in os.scandir(path): |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame] | 253 | if fobj.path not in self.ignored: |
| 254 | if fobj.is_dir(): |
| 255 | yield False, fobj.name |
| 256 | else: |
| 257 | yield True, fobj.name |
| 258 | |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 259 | |
| 260 | class YAMLSerializer(ISerializer): |
| 261 | """Serialize data to yaml""" |
koder aka kdanilov | f286517 | 2016-12-30 03:35:11 +0200 | [diff] [blame] | 262 | def pack(self, value: IStorable) -> bytes: |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 263 | try: |
| 264 | return yaml.dump(value, Dumper=Dumper, encoding="utf8") |
| 265 | except Exception as exc: |
| 266 | raise ValueError("Can't pickle object {!r} to yaml".format(type(value))) from exc |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 267 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 268 | def unpack(self, data: bytes) -> Any: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 269 | return yaml.load(data, Loader=Loader) |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 270 | |
| 271 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 272 | class SAFEYAMLSerializer(ISerializer): |
| 273 | """Serialize data to yaml""" |
koder aka kdanilov | f286517 | 2016-12-30 03:35:11 +0200 | [diff] [blame] | 274 | def pack(self, value: IStorable) -> bytes: |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 275 | try: |
| 276 | return yaml.safe_dump(value, encoding="utf8") |
| 277 | except Exception as exc: |
| 278 | raise ValueError("Can't pickle object {!r} to yaml".format(type(value))) from exc |
| 279 | |
| 280 | def unpack(self, data: bytes) -> Any: |
| 281 | return yaml.safe_load(data) |
| 282 | |
| 283 | |
| 284 | ObjClass = TypeVar('ObjClass', bound=IStorable) |
| 285 | |
| 286 | |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame] | 287 | class _Raise: |
| 288 | pass |
| 289 | |
| 290 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame^] | 291 | csv_file_encoding = 'ascii' |
| 292 | |
| 293 | |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 294 | class Storage: |
| 295 | """interface for storage""" |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 296 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 297 | def __init__(self, fs_storage: ISimpleStorage, db_storage: ISimpleStorage, serializer: ISerializer) -> None: |
| 298 | self.fs = fs_storage |
| 299 | self.db = db_storage |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 300 | self.serializer = serializer |
| 301 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 302 | def sub_storage(self, *path: str) -> 'Storage': |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 303 | fpath = "/".join(path) |
| 304 | return self.__class__(self.fs.sub_storage(fpath), self.db.sub_storage(fpath), self.serializer) |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 305 | |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 306 | def put(self, value: Any, *path: str) -> None: |
| 307 | dct_value = cast(IStorable, value).raw() if isinstance(value, IStorable) else value |
| 308 | serialized = self.serializer.pack(dct_value) # type: ignore |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 309 | fpath = "/".join(path) |
| 310 | self.db.put(serialized, fpath) |
| 311 | self.fs.put(serialized, fpath) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 312 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 313 | def put_list(self, value: Iterable[IStorable], *path: str) -> None: |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 314 | serialized = self.serializer.pack([obj.raw() for obj in value]) # type: ignore |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 315 | fpath = "/".join(path) |
| 316 | self.db.put(serialized, fpath) |
| 317 | self.fs.put(serialized, fpath) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 318 | |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame] | 319 | def get(self, path: str, default: Any = _Raise) -> Any: |
| 320 | try: |
| 321 | vl = self.db.get(path) |
| 322 | except: |
| 323 | if default is _Raise: |
| 324 | raise |
| 325 | return default |
| 326 | |
| 327 | return self.serializer.unpack(vl) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 328 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 329 | def rm(self, *path: str) -> None: |
| 330 | fpath = "/".join(path) |
| 331 | self.fs.rm(fpath) |
| 332 | self.db.rm(fpath) |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 333 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 334 | def __contains__(self, path: str) -> bool: |
| 335 | return path in self.fs or path in self.db |
koder aka kdanilov | 7308462 | 2016-11-16 21:51:08 +0200 | [diff] [blame] | 336 | |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 337 | def put_raw(self, val: bytes, *path: str) -> str: |
| 338 | fpath = "/".join(path) |
| 339 | self.fs.put(val, fpath) |
| 340 | # TODO: dirty hack |
| 341 | return self.resolve_raw(fpath) |
| 342 | |
| 343 | def resolve_raw(self, fpath) -> str: |
| 344 | return cast(FSStorage, self.fs).j(fpath) |
koder aka kdanilov | 3af3c33 | 2016-12-19 17:12:34 +0200 | [diff] [blame] | 345 | |
| 346 | def get_raw(self, *path: str) -> bytes: |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 347 | return self.fs.get("/".join(path)) |
koder aka kdanilov | 3af3c33 | 2016-12-19 17:12:34 +0200 | [diff] [blame] | 348 | |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame] | 349 | def append_raw(self, value: bytes, *path: str) -> None: |
| 350 | with self.fs.get_fd("/".join(path), "rb+") as fd: |
koder aka kdanilov | f286517 | 2016-12-30 03:35:11 +0200 | [diff] [blame] | 351 | fd.seek(0, os.SEEK_END) |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame] | 352 | fd.write(value) |
| 353 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 354 | def get_fd(self, path: str, mode: str = "r") -> IO: |
| 355 | return self.fs.get_fd(path, mode) |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 356 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame^] | 357 | def put_array(self, header: List[str], value: numpy.array, *path: str) -> None: |
| 358 | for val in header: |
| 359 | assert isinstance(val, str) and ',' not in val, \ |
| 360 | "Can't convert {!r} to array header, as it's values contains comma".format(header) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 361 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame^] | 362 | fpath = "/".join(path) |
| 363 | with self.get_fd(fpath, "wb") as fd: |
| 364 | self.do_append(fd, header, value, fpath) |
| 365 | |
| 366 | def get_array(self, *path: str) -> Tuple[List[str], numpy.array]: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 367 | path_s = "/".join(path) |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 368 | with self.get_fd(path_s, "rb") as fd: |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame^] | 369 | header = fd.readline().decode(csv_file_encoding).rstrip().split(",") |
| 370 | type_code, second_axis = header[-2:] |
| 371 | res = numpy.genfromtxt(fd, dtype=type_code, delimiter=',') |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 372 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame^] | 373 | if '0' == second_axis: |
| 374 | res.shape = (len(res),) |
| 375 | |
| 376 | return header[:-2], res |
| 377 | |
| 378 | def append(self, header: List[str], value: numpy.array, *path: str) -> None: |
| 379 | for val in header: |
| 380 | assert isinstance(val, str) and ',' not in val, \ |
| 381 | "Can't convert {!r} to array header, as it's values contains comma".format(header) |
| 382 | |
| 383 | fpath = "/".join(path) |
| 384 | with self.get_fd(fpath, "cb") as fd: |
| 385 | self.do_append(fd, header, value, fpath, maybe_append=True) |
| 386 | |
| 387 | def do_append(self, fd, header: List[str], value: numpy.array, path: str, fmt="%lu", |
| 388 | maybe_append: bool = False) -> None: |
| 389 | |
| 390 | if len(value.shape) == 1: |
| 391 | second_axis = 0 |
| 392 | else: |
| 393 | second_axis = value.shape[1] |
| 394 | header += [value.dtype.name, str(second_axis)] |
| 395 | |
| 396 | write_header = False |
| 397 | |
| 398 | if maybe_append: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 399 | fd.seek(0, os.SEEK_END) |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 400 | if fd.tell() != 0: |
| 401 | fd.seek(0, os.SEEK_SET) |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame^] | 402 | # check header match |
| 403 | curr_header = fd.readline().decode(csv_file_encoding).rstrip().split(",") |
| 404 | assert header == curr_header, \ |
| 405 | "Path {!r}. Expected header ({!r}) and current header ({!r}) don't match"\ |
| 406 | .format(path, header, curr_header) |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 407 | fd.seek(0, os.SEEK_END) |
| 408 | else: |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame^] | 409 | write_header = True |
| 410 | else: |
| 411 | write_header = True |
| 412 | |
| 413 | if write_header: |
| 414 | fd.write((",".join(header) + "\n").encode(csv_file_encoding)) |
| 415 | |
| 416 | if len(value.shape) == 1: |
| 417 | # make array vertical to simplify reading |
| 418 | vw = value.view().reshape((value.shape[0], 1)) |
| 419 | else: |
| 420 | vw = value |
| 421 | numpy.savetxt(fd, vw, delimiter=',', newline="\n", fmt=fmt) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 422 | |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 423 | def load_list(self, obj_class: Type[ObjClass], *path: str) -> List[ObjClass]: |
| 424 | path_s = "/".join(path) |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 425 | raw_val = cast(List[Dict[str, Any]], self.get(path_s)) |
koder aka kdanilov | 7308462 | 2016-11-16 21:51:08 +0200 | [diff] [blame] | 426 | assert isinstance(raw_val, list) |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 427 | return [cast(ObjClass, obj_class.fromraw(val)) for val in raw_val] |
koder aka kdanilov | 7308462 | 2016-11-16 21:51:08 +0200 | [diff] [blame] | 428 | |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 429 | def load(self, obj_class: Type[ObjClass], *path: str) -> ObjClass: |
| 430 | path_s = "/".join(path) |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 431 | return cast(ObjClass, obj_class.fromraw(self.get(path_s))) |
koder aka kdanilov | 7308462 | 2016-11-16 21:51:08 +0200 | [diff] [blame] | 432 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 433 | def sync(self) -> None: |
| 434 | self.db.sync() |
| 435 | self.fs.sync() |
koder aka kdanilov | 7308462 | 2016-11-16 21:51:08 +0200 | [diff] [blame] | 436 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 437 | def __enter__(self) -> 'Storage': |
| 438 | return self |
| 439 | |
| 440 | def __exit__(self, x: Any, y: Any, z: Any) -> None: |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 441 | self.sync() |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 442 | |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame] | 443 | def list(self, *path: str) -> Iterator[Tuple[bool, str]]: |
| 444 | return self.fs.list("/".join(path)) |
| 445 | |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 446 | def _iter_paths(self, |
| 447 | root: str, |
| 448 | path_parts: List[str], |
| 449 | groups: Dict[str, str]) -> Iterator[Tuple[bool, str, Dict[str, str]]]: |
| 450 | |
| 451 | curr = path_parts[0] |
| 452 | rest = path_parts[1:] |
| 453 | |
| 454 | for is_file, name in self.list(root): |
| 455 | if rest and is_file: |
| 456 | continue |
| 457 | |
| 458 | rr = re.match(pattern=curr + "$", string=name) |
| 459 | if rr: |
| 460 | if root: |
| 461 | path = root + "/" + name |
| 462 | else: |
| 463 | path = name |
| 464 | |
| 465 | new_groups = rr.groupdict().copy() |
| 466 | new_groups.update(groups) |
| 467 | |
| 468 | if rest: |
| 469 | yield from self._iter_paths(path, rest, new_groups) |
| 470 | else: |
| 471 | yield is_file, path, new_groups |
| 472 | |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 473 | |
| 474 | def make_storage(url: str, existing: bool = False) -> Storage: |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 475 | return Storage(FSStorage(url, existing), |
| 476 | DBStorage(os.path.join(url, DB_REL_PATH)), |
| 477 | SAFEYAMLSerializer()) |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 478 | |