Build a catalog for CMIP6 CMorized output#

Import packages#

from ecgtools import Builder
from ecgtools.parsers import parse_cmip6

Instatiate a Builder object#

b = Builder(root_path="/glade/collections/cmip/CMIP6/CFMIP/", depth=3, njobs=-1)

Build catalog and inspect built catalog#

Here we use the CMIP6 parser!

b.build(parse_cmip6)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 36 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  24 | elapsed:    1.2s remaining:    8.2s
[Parallel(n_jobs=-1)]: Done   8 out of  24 | elapsed:    1.2s remaining:    2.5s
[Parallel(n_jobs=-1)]: Done  13 out of  24 | elapsed:    1.3s remaining:    1.1s
[Parallel(n_jobs=-1)]: Done  18 out of  24 | elapsed:    1.5s remaining:    0.5s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    2.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 36 concurrent workers.
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 216 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:    3.1s
[Parallel(n_jobs=-1)]: Done 1296 tasks      | elapsed:    6.4s
[Parallel(n_jobs=-1)]: Done 2232 tasks      | elapsed:   10.4s
[Parallel(n_jobs=-1)]: Done 3312 tasks      | elapsed:   14.5s
[Parallel(n_jobs=-1)]: Done 4536 tasks      | elapsed:   19.2s
[Parallel(n_jobs=-1)]: Done 5904 tasks      | elapsed:   24.4s
[Parallel(n_jobs=-1)]: Done 7416 tasks      | elapsed:   30.5s
[Parallel(n_jobs=-1)]: Done 9072 tasks      | elapsed:   36.6s
[Parallel(n_jobs=-1)]: Done 10872 tasks      | elapsed:   42.5s
[Parallel(n_jobs=-1)]: Done 12816 tasks      | elapsed:   49.2s
[Parallel(n_jobs=-1)]: Done 14904 tasks      | elapsed:   56.7s
[Parallel(n_jobs=-1)]: Done 17136 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 17912 out of 17912 | elapsed:  1.1min finished
/glade/work/mgrover/git_repos/ecgtools/ecgtools/builder.py:180: UserWarning: Unable to parse 8 assets/files. A list of these assets can be found in `.invalid_assets` attribute.
  parsing_func, parsing_func_kwargs
Builder(root_path=PosixPath('/glade/collections/cmip/CMIP6/CFMIP'), extension='.nc', depth=3, exclude_patterns=None, njobs=-1)
b.df.head()
activity_id branch_method branch_time_in_child branch_time_in_parent experiment experiment_id frequency grid grid_label institution_id ... standard_name long_name units vertical_levels init_year start_time end_time time_range path version
0 CFMIP no parent 0.0 0.0 An AGCM experiment with monthly-varying SSTs a... piSST-4xCO2 mon native 0.9x1.25 finite volume grid (192x288 la... gn NCAR ... relative_humidity Relative Humidity % 32.0 NaN 0001-01-15 12:00:00 0030-12-15 12:00:00 0001-01-15 12:00:00-0030-12-15 12:00:00 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... v0
1 CFMIP no parent 0.0 0.0 An AGCM experiment with monthly-varying SSTs a... piSST-4xCO2 mon native 0.9x1.25 finite volume grid (192x288 la... gn NCAR ... relative_humidity Relative Humidity % 32.0 NaN 0001-01-15 12:00:00 0030-12-15 12:00:00 0001-01-15 12:00:00-0030-12-15 12:00:00 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... v20200209
2 CFMIP no parent 0.0 0.0 An AGCM experiment with monthly-varying SSTs a... piSST-4xCO2 mon native 0.9x1.25 finite volume grid (192x288 la... gn NCAR ... tendency_of_air_temperature_due_to_advection Tendency of Air Temperature Due to Advection K s-1 32.0 NaN 0001-01-15 12:00:00 0030-12-15 12:00:00 0001-01-15 12:00:00-0030-12-15 12:00:00 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... v0
3 CFMIP no parent 0.0 0.0 An AGCM experiment with monthly-varying SSTs a... piSST-4xCO2 mon native 0.9x1.25 finite volume grid (192x288 la... gn NCAR ... tendency_of_air_temperature_due_to_advection Tendency of Air Temperature Due to Advection K s-1 32.0 NaN 0001-01-15 12:00:00 0030-12-15 12:00:00 0001-01-15 12:00:00-0030-12-15 12:00:00 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... v20200209
4 CFMIP no parent 0.0 0.0 An AGCM experiment with monthly-varying SSTs a... piSST-4xCO2 mon native 0.9x1.25 finite volume grid (192x288 la... gn NCAR ... cloud_area_fraction CALIPSO Total Cloud Cover Percentage % 1.0 NaN 0001-01-15 12:00:00 0030-12-15 12:00:00 0001-01-15 12:00:00-0030-12-15 12:00:00 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... v0

5 rows × 36 columns

b.invalid_assets
INVALID_ASSET TRACEBACK
6372 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... Traceback (most recent call last):\n File "/g...
6373 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... Traceback (most recent call last):\n File "/g...
6374 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... Traceback (most recent call last):\n File "/g...
6583 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... Traceback (most recent call last):\n File "/g...
14578 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... Traceback (most recent call last):\n File "/g...
14579 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... Traceback (most recent call last):\n File "/g...
14580 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... Traceback (most recent call last):\n File "/g...
14789 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... Traceback (most recent call last):\n File "/g...

Save built catalog to disk#

b.save(
    '/glade/scratch/mgrover/test-cmip6-catalog.csv',
    path_column_name='path',
    variable_column_name='variable_id',
    data_format='netcdf',
    groupby_attrs=[
        'activity_id',
        'institution_id',
        'source_id',
        'experiment_id',
        'table_id',
        'grid_label',
    ],
    aggregations=[
        {'type': 'union', 'attribute_name': 'variable_id'},
        {
            'type': 'join_existing',
            'attribute_name': 'time_range',
            'options': {'dim': 'time', 'coords': 'minimal', 'compat': 'override'},
        },
        {
            'type': 'join_new',
            'attribute_name': 'member_id',
            'options': {'coords': 'minimal', 'compat': 'override'},
        },
    ],
)
Saved catalog location: /glade/scratch/mgrover/test-cmip6-catalog.json and /glade/scratch/mgrover/test-cmip6-catalog.csv
/glade/u/home/mgrover/miniconda3/envs/cesm2-marbl/lib/python3.7/site-packages/ipykernel_launcher.py:24: UserWarning: Unable to parse 8 assets/files. A list of these assets can be found in /glade/scratch/mgrover/invalid_assets_test-cmip6-catalog.csv.
!cat /glade/scratch/abanihi/test-cmip6-catalog.json
{
  "catalog_file": "test-cmip6-catalog.csv",
  "attributes": [
    {
      "column_name": "activity_id",
      "vocabulary": ""
    },
    {
      "column_name": "branch_method",
      "vocabulary": ""
    },
    {
      "column_name": "branch_time_in_child",
      "vocabulary": ""
    },
    {
      "column_name": "branch_time_in_parent",
      "vocabulary": ""
    },
    {
      "column_name": "experiment",
      "vocabulary": ""
    },
    {
      "column_name": "experiment_id",
      "vocabulary": ""
    },
    {
      "column_name": "frequency",
      "vocabulary": ""
    },
    {
      "column_name": "grid",
      "vocabulary": ""
    },
    {
      "column_name": "grid_label",
      "vocabulary": ""
    },
    {
      "column_name": "institution_id",
      "vocabulary": ""
    },
    {
      "column_name": "nominal_resolution",
      "vocabulary": ""
    },
    {
      "column_name": "parent_activity_id",
      "vocabulary": ""
    },
    {
      "column_name": "parent_experiment_id",
      "vocabulary": ""
    },
    {
      "column_name": "parent_source_id",
      "vocabulary": ""
    },
    {
      "column_name": "parent_time_units",
      "vocabulary": ""
    },
    {
      "column_name": "parent_variant_label",
      "vocabulary": ""
    },
    {
      "column_name": "product",
      "vocabulary": ""
    },
    {
      "column_name": "realm",
      "vocabulary": ""
    },
    {
      "column_name": "source_id",
      "vocabulary": ""
    },
    {
      "column_name": "source_type",
      "vocabulary": ""
    },
    {
      "column_name": "sub_experiment",
      "vocabulary": ""
    },
    {
      "column_name": "sub_experiment_id",
      "vocabulary": ""
    },
    {
      "column_name": "table_id",
      "vocabulary": ""
    },
    {
      "column_name": "variable_id",
      "vocabulary": ""
    },
    {
      "column_name": "variant_label",
      "vocabulary": ""
    },
    {
      "column_name": "member_id",
      "vocabulary": ""
    },
    {
      "column_name": "standard_name",
      "vocabulary": ""
    },
    {
      "column_name": "long_name",
      "vocabulary": ""
    },
    {
      "column_name": "units",
      "vocabulary": ""
    },
    {
      "column_name": "vertical_levels",
      "vocabulary": ""
    },
    {
      "column_name": "init_year",
      "vocabulary": ""
    },
    {
      "column_name": "start_time",
      "vocabulary": ""
    },
    {
      "column_name": "end_time",
      "vocabulary": ""
    },
    {
      "column_name": "time_range",
      "vocabulary": ""
    },
    {
      "column_name": "path",
      "vocabulary": ""
    },
    {
      "column_name": "version",
      "vocabulary": ""
    }
  ],
  "assets": {
    "column_name": "path",
    "format": "netcdf"
  },
  "aggregation_control": {
    "variable_column_name": "variable_id",
    "groupby_attrs": [
      "activity_id",
      "institution_id",
      "source_id",
      "experiment_id",
      "table_id",
      "grid_label"
    ],
    "aggregations": [
      {
        "type": "union",
        "attribute_name": "variable_id",
        "options": null
      },
      {
        "type": "join_existing",
        "attribute_name": "time_range",
        "options": {
          "dim": "time",
          "coords": "minimal",
          "compat": "override"
        }
      },
      {
        "type": "join_new",
        "attribute_name": "member_id",
        "options": {
          "coords": "minimal",
          "compat": "override"
        }
      }
    ]
  },
  "esmcat_version": "0.0.1",
  "id": null,
  "description": null,
  "last_updated": "2021-06-07T15:03:05+00:00"
}