-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsamples.py
More file actions
151 lines (118 loc) · 4.71 KB
/
samples.py
File metadata and controls
151 lines (118 loc) · 4.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# Usage:
# python samples.py --server [server_id1,server_id2,...] --dataset dataset_id
# prefix server id or dataset id with ^ to trigger regex match.
import os
import datetime
import utilrsw
import hapiclient
from hapimeta import logger_kwargs, data_dir
from hapiplot import hapiplot
log = utilrsw.logger(**logger_kwargs)
savefig_fmts = ['svg', 'png']
out_dir = os.path.join(data_dir, 'availability')
catalogs_all_file = os.path.join(data_dir, 'catalogs-all.pkl')
def cli():
clkws = {
"server": {
"help": "server id or comma separated list of server ids."
},
"dataset": {
"help": "dataset id or comma separated list of dataset ids. Escape commas in id with \. Prefix dataset id with ^ to use regex match."
}
}
import io
import csv
import argparse
parser = argparse.ArgumentParser()
for k, v in clkws.items():
parser.add_argument(f'--{k}', **v)
# Note that hyphens are converted to underscores when parsing
args = vars(parser.parse_args())
server = None
if args['server'] is not None:
server = args['server'].split(',')
dataset = None
if args['dataset'] is not None:
# Use csv.reader to handle escaping commas
csv_reader = csv.reader(io.StringIO(args['dataset']), escapechar='\\')
dataset = next(csv_reader)
return server, dataset
def process_server(catalog_all, server, datasets_only):
def extract_time(info, key):
if key not in info:
log.error(f" {server}/{dataset['id']}: key '{key}' is not in info")
return None, None
if info[key].strip() == "":
log.error(f" {server}/{dataset['id']}: info[{key}].strip() = ''")
return None, None
hapitime = info[key]
hapitimeSample = None
if key == 'startDate' and 'sampleStartDate' in info:
hapitimeSample = info['sampleStartDate']
if key == 'stopDate' and 'sampleStopDate' in info:
hapitimeSample = info['sampleStopDate']
return hapitime, hapitimeSample
log.info(f"server: {server} | {len(catalog_all['catalog'])} datasets")
server_url = catalog_all['about']['url']
for dataset in catalog_all['catalog']:
if 'id' not in dataset:
log.error(f" No 'id' in metadata: {dataset}. Skipping dataset.")
continue
if 'info' not in dataset:
log.error(f" id={dataset['id']}: No 'info' key. Skipping.")
continue
if datasets_only is not None and dataset['id'] not in datasets_only:
log.info(f" id={dataset['id']}: skipping dataset due to --dataset option")
continue
if 'parameters' not in dataset['info']:
log.error(f" id={dataset['id']}: No 'parameters' key. Skipping dataset.")
continue
startDate, sampleStartDate = extract_time(dataset['info'], 'startDate')
stopDate, sampleStopDate = extract_time(dataset['info'], 'stopDate')
parameters = dataset['info']['parameters']
log.info("")
log.info(f" id={dataset['id']}")
log.info(f" {len(parameters)} parameters")
log.info(f" startDate = {startDate}")
log.info(f" stopDate = {stopDate}")
log.info(f" sampleStartDate = {sampleStartDate}")
log.info(f" sampleStopDate = {sampleStopDate}")
log.info(" parameters:")
if sampleStartDate is not None and sampleStopDate is not None:
startDate = sampleStartDate
stopDate = sampleStopDate
else:
startDate = startDate
stopDate = hapiclient.hapitime2datetime(startDate, allow_missing_Z=True)[0]
stopDate = stopDate + datetime.timedelta(days=1)
stopDate = datetime.strptime(stopDate, "%Y-%m-%dT%H:%M:%S.%fZ")
for i, parameter in enumerate(parameters):
log.info(f" {i}. {parameter['name']}")
try:
data, meta = hapiclient.hapi(server_url, dataset['id'], parameter['name'], startDate, stopDate, logging=True)
except Exception as e:
log.error(f" {server} {dataset['id']}: Error getting data: {e}")
continue
try:
import pdb; pdb.set_trace()
data, meta = hapiplot(data, meta, returnimage=True)
except Exception as e:
log.error(f" {server} {dataset['id']}: Error plotting data: {e}")
continue
servers_only, datasets_only = cli()
catalogs_all = utilrsw.read(catalogs_all_file)
if servers_only is not None:
log.info(f"Generating sample plots for servers: {servers_only}")
else:
log.info(f"Generating sample plots for all servers in {catalogs_all_file}")
if datasets_only is not None:
log.info(f"Generating sample plots for datasets: {datasets_only}")
else:
log.info("Generating sample plots for all datasets.")
servers = []
for server in catalogs_all.keys():
if servers_only is not None and server not in servers_only:
continue
servers.append(server)
for server in servers:
process_server(catalogs_all[server], server, datasets_only)