Blame - wally/hlstorage.py - mcp/cvp-wally

2017-01-19 20:17:16 +0200

[diff] [blame]

1

import os

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

2

import logging

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

3

from typing import cast, Iterator, Tuple, Type, Dict, Optional, Any, List

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

import numpy

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

7

from .suits.job import JobConfig

8

from .result_classes import SuiteConfig, TimeSeries, DataSource, StatProps, IResultStorage

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

9

from .storage import Storage

10

from .utils import StopTestError

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

11

from .suits.all_suits import all_suits

12

13

14

logger = logging.getLogger('wally')

class DB_re:

node_id = r'\d+.\d+.\d+.\d+:\d+'

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

19

job_id = r'[-a-zA-Z0-9_]+_\d+'

20

suite_id = r'[a-z_]+_\d+'

21

sensor = r'[-a-z_]+'

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

22

dev = r'[-a-zA-Z0-9_]+'

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

23

tag = r'[a-z_.]+'

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

24

metric = r'[a-z_.]+'

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

25

26

27

class DB_paths:

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

28

suite_cfg_r = r'results/{suite_id}\.info\.yml'

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

29

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

30

job_root = r'results/{suite_id}.{job_id}/'

31

job_cfg_r = job_root + r'info\.yml'

32

33

# time series, data from load tool, sensor is a tool name

34

ts_r = job_root + r'{node_id}\.{sensor}\.{metric}.{tag}'

35

36

# statistica data for ts

37

stat_r = job_root + r'{node_id}\.{sensor}\.{metric}\.stat.yaml'

38

39

# sensor data

40

sensor_data_r = r'sensors/{node_id}_{sensor}\.{dev}\.{metric}\.csv'

41

sensor_time_r = r'sensors/{node_id}_collected_at\.csv'

42

43

report_root = 'report/'

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

44

plot_r = r'{suite_id}\.{job_id}/{node_id}\.{sensor}\.{dev}\.{metric}\.{tag}'

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

45

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

46

job_cfg = job_cfg_r.replace("\\.", '.')

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

47

suite_cfg = suite_cfg_r.replace("\\.", '.')

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

48

ts = ts_r.replace("\\.", '.')

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

49

stat = stat_r.replace("\\.", '.')

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

50

sensor_data = sensor_data_r.replace("\\.", '.')

51

sensor_time = sensor_time_r.replace("\\.", '.')

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

52

plot = plot_r.replace("\\.", '.')

53

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

54

55

DB_rr = {name: r"(?P<{}>{})".format(name, rr)

56

for name, rr in DB_re.__dict__.items()

57

if not name.startswith("__")}

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

58

59

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

60

def fill_path(path: str, **params) -> str:

61

for name, val in params.items():

62

if val is not None:

63

path = path.replace("{" + name + "}", val)

64

return path

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

65

66

67

class ResultStorage(IResultStorage):

68

# TODO: check that all path components match required patterns

69

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

70

ts_header_size = 64

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

71

ts_header_format = "!IIIcc"

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

72

ts_arr_tag = 'csv'

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

73

csv_file_encoding = 'ascii'

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

74

75

def __init__(self, storage: Storage) -> None:

76

self.storage = storage

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

77

self.cache = {} # type: Dict[str, Tuple[int, int, Any, List[str]]]

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

78

79

def sync(self) -> None:

80

self.storage.sync()

81

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

82

# ----------------- SERIALIZATION / DESERIALIZATION -------------------------------------------------------------

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

83

def load_array(self, path: str, skip_shape: bool = False) -> Tuple[numpy.array, Tuple[str, ...]]:

84

with self.storage.get_fd(path, "rb") as fd:

85

stats = os.fstat(fd.fileno())

86

if path in self.cache:

87

size, atime, obj, header = self.cache[path]

88

if size == stats.st_size and atime == stats.st_atime_ns:

89

return obj, header

90

91

header = fd.readline().decode(self.csv_file_encoding).strip().split(",")

92

print("header =", header)

93

if skip_shape:

94

header = header[1:]

95

dt = fd.read().decode("utf-8").strip()

96

print(dt.split("\n")[0])

97

arr = numpy.fromstring(dt.replace("\n", ','), sep=',', dtype=header[0])

98

if len(dt) != 0:

99

lines = dt.count("\n") + 1

100

columns = dt.split("\n", 1)[0].count(",") + 1

101

assert lines * columns == len(arr)

if columns == 1:

arr.shape = (lines,)

else:

arr.shape = (lines, columns)

106

107

self.cache[path] = (stats.st_size, stats.st_atime_ns, arr, header[1:])

108

return arr, header[1:]

109

110

def put_array(self, path:str, data: numpy.array, header: List[str], append_on_exists: bool = False) -> None:

111

header = [data.dtype.name] + header

112

113

exists = append_on_exists and path in self.storage

114

if len(data.shape) == 1:

115

# make array vertical to simplify reading

116

vw = data.view().reshape((data.shape[0], 1))

else:

vw = data

with self.storage.get_fd(path, "cb" if exists else "wb") as fd:

121

if exists:

122

curr_header = fd.readline().decode(self.csv_file_encoding).rstrip().split(",")

123

assert header == curr_header, \

124

"Path {!r}. Expected header ({!r}) and current header ({!r}) don't match"\

125

.format(path, header, curr_header)

126

fd.seek(0, os.SEEK_END)

127

else:

128

fd.write((",".join(header) + "\n").encode(self.csv_file_encoding))

129

130

numpy.savetxt(fd, vw, delimiter=',', newline="\n", fmt="%lu")

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

131

132

def load_ts(self, ds: DataSource, path: str) -> TimeSeries:

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

133

arr, header = self.load_array(path, skip_shape=True)

134

units, time_units = header

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

135

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

136

data = arr[:,1:]

137

if data.shape[1] == 1:

138

data = data.reshape((-1,))

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

139

140

return TimeSeries("{}.{}".format(ds.dev, ds.sensor),

141

raw=None,

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

142

data=data,

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

times=arr[:,0],

source=ds,

units=units,

time_units=time_units)

147

148

def load_sensor(self, ds: DataSource) -> TimeSeries:

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

149

collected_at, collect_header = self.load_array(DB_paths.sensor_time.format(**ds.__dict__))

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

150

assert collect_header == [ds.node_id, 'collected_at', 'us'], repr(collect_header)

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

151

data, data_header = self.load_array(DB_paths.sensor_data.format(**ds.__dict__))

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

152

153

data_units = data_header[2]

154

assert data_header == [ds.node_id, ds.metric_fqdn, data_units]

155

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

156

assert len(data.shape) == 1

157

assert len(collected_at.shape) == 1

158

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

159

return TimeSeries(ds.metric_fqdn,

raw=None,

data=data,

times=collected_at,

source=ds,

units=data_units,

time_units='us')

# ------------- CHECK DATA IN STORAGE ----------------------------------------------------------------------------

168

169

def check_plot_file(self, source: DataSource) -> Optional[str]:

170

path = DB_paths.plot.format(**source.__dict__)

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

171

fpath = self.storage.resolve_raw(DB_paths.report_root + path)

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

172

return path if os.path.exists(fpath) else None

173

174

# ------------- PUT DATA INTO STORAGE --------------------------------------------------------------------------

175

176

def put_or_check_suite(self, suite: SuiteConfig) -> None:

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

177

path = DB_paths.suite_cfg.format(suite_id=suite.storage_id)

178

if path in self.storage:

179

db_cfg = self.storage.get(path)

180

if db_cfg != suite:

181

logger.error("Current suite %s config is not equal to found in storage at %s", suite.test_type, path)

182

raise StopTestError()

183

184

self.storage.put(suite, path)

185

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

186

def put_job(self, suite: SuiteConfig, job: JobConfig) -> None:

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

187

path = DB_paths.job_cfg.format(suite_id=suite.storage_id, job_id=job.storage_id)

188

self.storage.put(job, path)

189

190

def put_ts(self, ts: TimeSeries) -> None:

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

191

assert ts.data.dtype == ts.times.dtype

192

assert ts.data.dtype.kind == 'u'

193

assert ts.source.tag == self.ts_arr_tag

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

194

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

195

csv_path = DB_paths.ts.format(**ts.source.__dict__)

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

196

header = [ts.data.dtype.name, ts.units, ts.time_units]

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

197

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

198

tv = ts.times.view().reshape((-1, 1))

199

if len(ts.data.shape) == 1:

200

dv = ts.data.view().reshape((ts.times.shape[0], -1))

201

else:

202

dv = ts.data

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

203

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

204

result = numpy.concatenate((tv, dv), axis=1)

205

self.put_array(csv_path, result, header)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

206

207

if ts.raw:

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

208

raw_path = DB_paths.ts.format(**ts.source(tag=ts.raw_tag).__dict__)

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

209

self.storage.put_raw(ts.raw, raw_path)

210

211

def put_extra(self, data: bytes, source: DataSource) -> None:

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

212

self.storage.put(data, DB_paths.ts.format(**source.__dict__))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

213

214

def put_stat(self, data: StatProps, source: DataSource) -> None:

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

215

self.storage.put(data, DB_paths.stat.format(**source.__dict__))

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

216

217

# return path to file to be inserted into report

218

def put_plot_file(self, data: bytes, source: DataSource) -> str:

219

path = DB_paths.plot.format(**source.__dict__)

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

220

self.storage.put_raw(data, DB_paths.report_root + path)

221

return path

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

222

koder aka kdanilov

2017-01-19 20:17:16 +0200

[diff] [blame]

223

def put_report(self, report: str, name: str) -> str:

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

224

return self.storage.put_raw(report.encode(self.csv_file_encoding), DB_paths.report_root + name)

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

225

226

def append_sensor(self, data: numpy.array, ds: DataSource, units: str) -> None:

227

if ds.metric == 'collected_at':

228

path = DB_paths.sensor_time

229

metrics_fqn = 'collected_at'

230

else:

231

path = DB_paths.sensor_data

232

metrics_fqn = ds.metric_fqdn

kdanylov aka koder

2017-03-27 22:19:09 +0300

[diff] [blame]

233

self.put_array(path.format(**ds.__dict__), data, [ds.node_id, metrics_fqn, units], append_on_exists=True)

koder aka kdanilov

2017-02-01 20:29:56 +0200

[diff] [blame]

234

235

# ------------- GET DATA FROM STORAGE --------------------------------------------------------------------------

236

237

def get_stat(self, stat_cls: Type[StatProps], source: DataSource) -> StatProps:

238

return self.storage.load(stat_cls, DB_paths.stat.format(**source.__dict__))

239

240

# ------------- ITER OVER STORAGE ------------------------------------------------------------------------------

241

242

def iter_paths(self, path_glob) -> Iterator[Tuple[bool, str, Dict[str, str]]]:

243

path = path_glob.format(**DB_rr).split("/")

244

yield from self.storage._iter_paths("", path, {})

245

246

def iter_suite(self, suite_type: str = None) -> Iterator[SuiteConfig]:

247

for is_file, suite_info_path, groups in self.iter_paths(DB_paths.suite_cfg_r):

248

assert is_file

249

suite = self.storage.load(SuiteConfig, suite_info_path)

250

# suite = cast(SuiteConfig, self.storage.load(SuiteConfig, suite_info_path))

251

assert suite.storage_id == groups['suite_id']

252

if not suite_type or suite.test_type == suite_type:

253

yield suite

254

255

def iter_job(self, suite: SuiteConfig) -> Iterator[JobConfig]:

256

job_glob = fill_path(DB_paths.job_cfg_r, suite_id=suite.storage_id)

257

job_config_cls = all_suits[suite.test_type].job_config_cls

258

for is_file, path, groups in self.iter_paths(job_glob):

259

assert is_file

260

job = cast(JobConfig, self.storage.load(job_config_cls, path))

261

assert job.storage_id == groups['job_id']

262

yield job

263

264

# iterate over test tool data

265

def iter_ts(self, suite: SuiteConfig, job: JobConfig, **filters) -> Iterator[TimeSeries]:

266

filters.update(suite_id=suite.storage_id, job_id=job.storage_id)

267

ts_glob = fill_path(DB_paths.ts_r, **filters)

koder aka kdanilov