Build a catalog for CMIP6 CMorized output#
Import packages#
from ecgtools import Builder
from ecgtools.parsers import parse_cmip6
Instatiate a Builder
object#
b = Builder(root_path="/glade/collections/cmip/CMIP6/CFMIP/", depth=3, njobs=-1)
Build catalog and inspect built catalog#
Here we use the CMIP6 parser!
b.build(parse_cmip6)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 36 concurrent workers.
[Parallel(n_jobs=-1)]: Done 3 out of 24 | elapsed: 1.2s remaining: 8.2s
[Parallel(n_jobs=-1)]: Done 8 out of 24 | elapsed: 1.2s remaining: 2.5s
[Parallel(n_jobs=-1)]: Done 13 out of 24 | elapsed: 1.3s remaining: 1.1s
[Parallel(n_jobs=-1)]: Done 18 out of 24 | elapsed: 1.5s remaining: 0.5s
[Parallel(n_jobs=-1)]: Done 24 out of 24 | elapsed: 2.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 36 concurrent workers.
[Parallel(n_jobs=-1)]: Done 90 tasks | elapsed: 1.6s
[Parallel(n_jobs=-1)]: Done 216 tasks | elapsed: 2.1s
[Parallel(n_jobs=-1)]: Done 504 tasks | elapsed: 3.1s
[Parallel(n_jobs=-1)]: Done 1296 tasks | elapsed: 6.4s
[Parallel(n_jobs=-1)]: Done 2232 tasks | elapsed: 10.4s
[Parallel(n_jobs=-1)]: Done 3312 tasks | elapsed: 14.5s
[Parallel(n_jobs=-1)]: Done 4536 tasks | elapsed: 19.2s
[Parallel(n_jobs=-1)]: Done 5904 tasks | elapsed: 24.4s
[Parallel(n_jobs=-1)]: Done 7416 tasks | elapsed: 30.5s
[Parallel(n_jobs=-1)]: Done 9072 tasks | elapsed: 36.6s
[Parallel(n_jobs=-1)]: Done 10872 tasks | elapsed: 42.5s
[Parallel(n_jobs=-1)]: Done 12816 tasks | elapsed: 49.2s
[Parallel(n_jobs=-1)]: Done 14904 tasks | elapsed: 56.7s
[Parallel(n_jobs=-1)]: Done 17136 tasks | elapsed: 1.1min
[Parallel(n_jobs=-1)]: Done 17912 out of 17912 | elapsed: 1.1min finished
/glade/work/mgrover/git_repos/ecgtools/ecgtools/builder.py:180: UserWarning: Unable to parse 8 assets/files. A list of these assets can be found in `.invalid_assets` attribute.
parsing_func, parsing_func_kwargs
Builder(root_path=PosixPath('/glade/collections/cmip/CMIP6/CFMIP'), extension='.nc', depth=3, exclude_patterns=None, njobs=-1)
b.df.head()
activity_id | branch_method | branch_time_in_child | branch_time_in_parent | experiment | experiment_id | frequency | grid | grid_label | institution_id | ... | standard_name | long_name | units | vertical_levels | init_year | start_time | end_time | time_range | path | version | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | CFMIP | no parent | 0.0 | 0.0 | An AGCM experiment with monthly-varying SSTs a... | piSST-4xCO2 | mon | native 0.9x1.25 finite volume grid (192x288 la... | gn | NCAR | ... | relative_humidity | Relative Humidity | % | 32.0 | NaN | 0001-01-15 12:00:00 | 0030-12-15 12:00:00 | 0001-01-15 12:00:00-0030-12-15 12:00:00 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | v0 |
1 | CFMIP | no parent | 0.0 | 0.0 | An AGCM experiment with monthly-varying SSTs a... | piSST-4xCO2 | mon | native 0.9x1.25 finite volume grid (192x288 la... | gn | NCAR | ... | relative_humidity | Relative Humidity | % | 32.0 | NaN | 0001-01-15 12:00:00 | 0030-12-15 12:00:00 | 0001-01-15 12:00:00-0030-12-15 12:00:00 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | v20200209 |
2 | CFMIP | no parent | 0.0 | 0.0 | An AGCM experiment with monthly-varying SSTs a... | piSST-4xCO2 | mon | native 0.9x1.25 finite volume grid (192x288 la... | gn | NCAR | ... | tendency_of_air_temperature_due_to_advection | Tendency of Air Temperature Due to Advection | K s-1 | 32.0 | NaN | 0001-01-15 12:00:00 | 0030-12-15 12:00:00 | 0001-01-15 12:00:00-0030-12-15 12:00:00 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | v0 |
3 | CFMIP | no parent | 0.0 | 0.0 | An AGCM experiment with monthly-varying SSTs a... | piSST-4xCO2 | mon | native 0.9x1.25 finite volume grid (192x288 la... | gn | NCAR | ... | tendency_of_air_temperature_due_to_advection | Tendency of Air Temperature Due to Advection | K s-1 | 32.0 | NaN | 0001-01-15 12:00:00 | 0030-12-15 12:00:00 | 0001-01-15 12:00:00-0030-12-15 12:00:00 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | v20200209 |
4 | CFMIP | no parent | 0.0 | 0.0 | An AGCM experiment with monthly-varying SSTs a... | piSST-4xCO2 | mon | native 0.9x1.25 finite volume grid (192x288 la... | gn | NCAR | ... | cloud_area_fraction | CALIPSO Total Cloud Cover Percentage | % | 1.0 | NaN | 0001-01-15 12:00:00 | 0030-12-15 12:00:00 | 0001-01-15 12:00:00-0030-12-15 12:00:00 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | v0 |
5 rows × 36 columns
b.invalid_assets
INVALID_ASSET | TRACEBACK | |
---|---|---|
6372 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | Traceback (most recent call last):\n File "/g... |
6373 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | Traceback (most recent call last):\n File "/g... |
6374 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | Traceback (most recent call last):\n File "/g... |
6583 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | Traceback (most recent call last):\n File "/g... |
14578 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | Traceback (most recent call last):\n File "/g... |
14579 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | Traceback (most recent call last):\n File "/g... |
14580 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | Traceback (most recent call last):\n File "/g... |
14789 | /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... | Traceback (most recent call last):\n File "/g... |
Save built catalog to disk#
b.save(
'/glade/scratch/mgrover/test-cmip6-catalog.csv',
path_column_name='path',
variable_column_name='variable_id',
data_format='netcdf',
groupby_attrs=[
'activity_id',
'institution_id',
'source_id',
'experiment_id',
'table_id',
'grid_label',
],
aggregations=[
{'type': 'union', 'attribute_name': 'variable_id'},
{
'type': 'join_existing',
'attribute_name': 'time_range',
'options': {'dim': 'time', 'coords': 'minimal', 'compat': 'override'},
},
{
'type': 'join_new',
'attribute_name': 'member_id',
'options': {'coords': 'minimal', 'compat': 'override'},
},
],
)
Saved catalog location: /glade/scratch/mgrover/test-cmip6-catalog.json and /glade/scratch/mgrover/test-cmip6-catalog.csv
/glade/u/home/mgrover/miniconda3/envs/cesm2-marbl/lib/python3.7/site-packages/ipykernel_launcher.py:24: UserWarning: Unable to parse 8 assets/files. A list of these assets can be found in /glade/scratch/mgrover/invalid_assets_test-cmip6-catalog.csv.
!cat /glade/scratch/abanihi/test-cmip6-catalog.json
{
"catalog_file": "test-cmip6-catalog.csv",
"attributes": [
{
"column_name": "activity_id",
"vocabulary": ""
},
{
"column_name": "branch_method",
"vocabulary": ""
},
{
"column_name": "branch_time_in_child",
"vocabulary": ""
},
{
"column_name": "branch_time_in_parent",
"vocabulary": ""
},
{
"column_name": "experiment",
"vocabulary": ""
},
{
"column_name": "experiment_id",
"vocabulary": ""
},
{
"column_name": "frequency",
"vocabulary": ""
},
{
"column_name": "grid",
"vocabulary": ""
},
{
"column_name": "grid_label",
"vocabulary": ""
},
{
"column_name": "institution_id",
"vocabulary": ""
},
{
"column_name": "nominal_resolution",
"vocabulary": ""
},
{
"column_name": "parent_activity_id",
"vocabulary": ""
},
{
"column_name": "parent_experiment_id",
"vocabulary": ""
},
{
"column_name": "parent_source_id",
"vocabulary": ""
},
{
"column_name": "parent_time_units",
"vocabulary": ""
},
{
"column_name": "parent_variant_label",
"vocabulary": ""
},
{
"column_name": "product",
"vocabulary": ""
},
{
"column_name": "realm",
"vocabulary": ""
},
{
"column_name": "source_id",
"vocabulary": ""
},
{
"column_name": "source_type",
"vocabulary": ""
},
{
"column_name": "sub_experiment",
"vocabulary": ""
},
{
"column_name": "sub_experiment_id",
"vocabulary": ""
},
{
"column_name": "table_id",
"vocabulary": ""
},
{
"column_name": "variable_id",
"vocabulary": ""
},
{
"column_name": "variant_label",
"vocabulary": ""
},
{
"column_name": "member_id",
"vocabulary": ""
},
{
"column_name": "standard_name",
"vocabulary": ""
},
{
"column_name": "long_name",
"vocabulary": ""
},
{
"column_name": "units",
"vocabulary": ""
},
{
"column_name": "vertical_levels",
"vocabulary": ""
},
{
"column_name": "init_year",
"vocabulary": ""
},
{
"column_name": "start_time",
"vocabulary": ""
},
{
"column_name": "end_time",
"vocabulary": ""
},
{
"column_name": "time_range",
"vocabulary": ""
},
{
"column_name": "path",
"vocabulary": ""
},
{
"column_name": "version",
"vocabulary": ""
}
],
"assets": {
"column_name": "path",
"format": "netcdf"
},
"aggregation_control": {
"variable_column_name": "variable_id",
"groupby_attrs": [
"activity_id",
"institution_id",
"source_id",
"experiment_id",
"table_id",
"grid_label"
],
"aggregations": [
{
"type": "union",
"attribute_name": "variable_id",
"options": null
},
{
"type": "join_existing",
"attribute_name": "time_range",
"options": {
"dim": "time",
"coords": "minimal",
"compat": "override"
}
},
{
"type": "join_new",
"attribute_name": "member_id",
"options": {
"coords": "minimal",
"compat": "override"
}
}
]
},
"esmcat_version": "0.0.1",
"id": null,
"description": null,
"last_updated": "2021-06-07T15:03:05+00:00"
}