{ "cells": [ { "cell_type": "markdown", "id": "experimental-reality", "metadata": {}, "source": [ "# Build a catalog for CMIP6 CMorized output" ] }, { "cell_type": "markdown", "id": "imperial-radio", "metadata": {}, "source": [ "## Import packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "solid-connection", "metadata": {}, "outputs": [], "source": [ "from ecgtools import Builder\n", "from ecgtools.parsers import parse_cmip6" ] }, { "cell_type": "markdown", "id": "resistant-indicator", "metadata": {}, "source": [ "## Instantiate a `Builder` object" ] }, { "cell_type": "code", "execution_count": 2, "id": "isolated-glossary", "metadata": {}, "outputs": [], "source": [ "b = Builder([\"/glade/collections/cmip/CMIP6/CFMIP/\"], depth=3, njobs=-1)" ] }, { "cell_type": "markdown", "id": "decreased-object", "metadata": {}, "source": [ "## Build catalog and inspect built catalog\n", "Here we use the CMIP6 parser!" ] }, { "cell_type": "code", "execution_count": 3, "id": "fundamental-supervisor", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 36 concurrent workers.\n", "[Parallel(n_jobs=-1)]: Done 3 out of 24 | elapsed: 1.2s remaining: 8.2s\n", "[Parallel(n_jobs=-1)]: Done 8 out of 24 | elapsed: 1.2s remaining: 2.5s\n", "[Parallel(n_jobs=-1)]: Done 13 out of 24 | elapsed: 1.3s remaining: 1.1s\n", "[Parallel(n_jobs=-1)]: Done 18 out of 24 | elapsed: 1.5s remaining: 0.5s\n", "[Parallel(n_jobs=-1)]: Done 24 out of 24 | elapsed: 2.3s finished\n", "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 36 concurrent workers.\n", "[Parallel(n_jobs=-1)]: Done 90 tasks | elapsed: 1.6s\n", "[Parallel(n_jobs=-1)]: Done 216 tasks | elapsed: 2.1s\n", "[Parallel(n_jobs=-1)]: Done 504 tasks | elapsed: 3.1s\n", "[Parallel(n_jobs=-1)]: Done 1296 tasks | elapsed: 6.4s\n", "[Parallel(n_jobs=-1)]: Done 2232 tasks | elapsed: 10.4s\n", "[Parallel(n_jobs=-1)]: Done 3312 tasks | elapsed: 14.5s\n", "[Parallel(n_jobs=-1)]: Done 4536 tasks | elapsed: 19.2s\n", "[Parallel(n_jobs=-1)]: Done 5904 tasks | elapsed: 24.4s\n", "[Parallel(n_jobs=-1)]: Done 7416 tasks | elapsed: 30.5s\n", "[Parallel(n_jobs=-1)]: Done 9072 tasks | elapsed: 36.6s\n", "[Parallel(n_jobs=-1)]: Done 10872 tasks | elapsed: 42.5s\n", "[Parallel(n_jobs=-1)]: Done 12816 tasks | elapsed: 49.2s\n", "[Parallel(n_jobs=-1)]: Done 14904 tasks | elapsed: 56.7s\n", "[Parallel(n_jobs=-1)]: Done 17136 tasks | elapsed: 1.1min\n", "[Parallel(n_jobs=-1)]: Done 17912 out of 17912 | elapsed: 1.1min finished\n", "/glade/work/mgrover/git_repos/ecgtools/ecgtools/builder.py:180: UserWarning: Unable to parse 8 assets/files. A list of these assets can be found in `.invalid_assets` attribute.\n", " parsing_func, parsing_func_kwargs\n" ] }, { "data": { "text/plain": [ "Builder(root_path=PosixPath('/glade/collections/cmip/CMIP6/CFMIP'), extension='.nc', depth=3, exclude_patterns=None, njobs=-1)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b.build(parse_cmip6)" ] }, { "cell_type": "code", "execution_count": 4, "id": "eleven-touch", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
activity_idbranch_methodbranch_time_in_childbranch_time_in_parentexperimentexperiment_idfrequencygridgrid_labelinstitution_id...standard_namelong_nameunitsvertical_levelsinit_yearstart_timeend_timetime_rangepathversion
0CFMIPno parent0.00.0An AGCM experiment with monthly-varying SSTs a...piSST-4xCO2monnative 0.9x1.25 finite volume grid (192x288 la...gnNCAR...relative_humidityRelative Humidity%32.0NaN0001-01-15 12:00:000030-12-15 12:00:000001-01-15 12:00:00-0030-12-15 12:00:00/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...v0
1CFMIPno parent0.00.0An AGCM experiment with monthly-varying SSTs a...piSST-4xCO2monnative 0.9x1.25 finite volume grid (192x288 la...gnNCAR...relative_humidityRelative Humidity%32.0NaN0001-01-15 12:00:000030-12-15 12:00:000001-01-15 12:00:00-0030-12-15 12:00:00/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...v20200209
2CFMIPno parent0.00.0An AGCM experiment with monthly-varying SSTs a...piSST-4xCO2monnative 0.9x1.25 finite volume grid (192x288 la...gnNCAR...tendency_of_air_temperature_due_to_advectionTendency of Air Temperature Due to AdvectionK s-132.0NaN0001-01-15 12:00:000030-12-15 12:00:000001-01-15 12:00:00-0030-12-15 12:00:00/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...v0
3CFMIPno parent0.00.0An AGCM experiment with monthly-varying SSTs a...piSST-4xCO2monnative 0.9x1.25 finite volume grid (192x288 la...gnNCAR...tendency_of_air_temperature_due_to_advectionTendency of Air Temperature Due to AdvectionK s-132.0NaN0001-01-15 12:00:000030-12-15 12:00:000001-01-15 12:00:00-0030-12-15 12:00:00/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...v20200209
4CFMIPno parent0.00.0An AGCM experiment with monthly-varying SSTs a...piSST-4xCO2monnative 0.9x1.25 finite volume grid (192x288 la...gnNCAR...cloud_area_fractionCALIPSO Total Cloud Cover Percentage%1.0NaN0001-01-15 12:00:000030-12-15 12:00:000001-01-15 12:00:00-0030-12-15 12:00:00/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...v0
\n", "

5 rows × 36 columns

\n", "
" ], "text/plain": [ " activity_id branch_method branch_time_in_child branch_time_in_parent \\\n", "0 CFMIP no parent 0.0 0.0 \n", "1 CFMIP no parent 0.0 0.0 \n", "2 CFMIP no parent 0.0 0.0 \n", "3 CFMIP no parent 0.0 0.0 \n", "4 CFMIP no parent 0.0 0.0 \n", "\n", " experiment experiment_id frequency \\\n", "0 An AGCM experiment with monthly-varying SSTs a... piSST-4xCO2 mon \n", "1 An AGCM experiment with monthly-varying SSTs a... piSST-4xCO2 mon \n", "2 An AGCM experiment with monthly-varying SSTs a... piSST-4xCO2 mon \n", "3 An AGCM experiment with monthly-varying SSTs a... piSST-4xCO2 mon \n", "4 An AGCM experiment with monthly-varying SSTs a... piSST-4xCO2 mon \n", "\n", " grid grid_label \\\n", "0 native 0.9x1.25 finite volume grid (192x288 la... gn \n", "1 native 0.9x1.25 finite volume grid (192x288 la... gn \n", "2 native 0.9x1.25 finite volume grid (192x288 la... gn \n", "3 native 0.9x1.25 finite volume grid (192x288 la... gn \n", "4 native 0.9x1.25 finite volume grid (192x288 la... gn \n", "\n", " institution_id ... standard_name \\\n", "0 NCAR ... relative_humidity \n", "1 NCAR ... relative_humidity \n", "2 NCAR ... tendency_of_air_temperature_due_to_advection \n", "3 NCAR ... tendency_of_air_temperature_due_to_advection \n", "4 NCAR ... cloud_area_fraction \n", "\n", " long_name units vertical_levels \\\n", "0 Relative Humidity % 32.0 \n", "1 Relative Humidity % 32.0 \n", "2 Tendency of Air Temperature Due to Advection K s-1 32.0 \n", "3 Tendency of Air Temperature Due to Advection K s-1 32.0 \n", "4 CALIPSO Total Cloud Cover Percentage % 1.0 \n", "\n", " init_year start_time end_time \\\n", "0 NaN 0001-01-15 12:00:00 0030-12-15 12:00:00 \n", "1 NaN 0001-01-15 12:00:00 0030-12-15 12:00:00 \n", "2 NaN 0001-01-15 12:00:00 0030-12-15 12:00:00 \n", "3 NaN 0001-01-15 12:00:00 0030-12-15 12:00:00 \n", "4 NaN 0001-01-15 12:00:00 0030-12-15 12:00:00 \n", "\n", " time_range \\\n", "0 0001-01-15 12:00:00-0030-12-15 12:00:00 \n", "1 0001-01-15 12:00:00-0030-12-15 12:00:00 \n", "2 0001-01-15 12:00:00-0030-12-15 12:00:00 \n", "3 0001-01-15 12:00:00-0030-12-15 12:00:00 \n", "4 0001-01-15 12:00:00-0030-12-15 12:00:00 \n", "\n", " path version \n", "0 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... v0 \n", "1 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... v20200209 \n", "2 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... v0 \n", "3 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... v20200209 \n", "4 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... v0 \n", "\n", "[5 rows x 36 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b.df.head()" ] }, { "cell_type": "code", "execution_count": 6, "id": "mighty-recommendation", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
INVALID_ASSETTRACEBACK
6372/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...Traceback (most recent call last):\\n File \"/g...
6373/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...Traceback (most recent call last):\\n File \"/g...
6374/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...Traceback (most recent call last):\\n File \"/g...
6583/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...Traceback (most recent call last):\\n File \"/g...
14578/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...Traceback (most recent call last):\\n File \"/g...
14579/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...Traceback (most recent call last):\\n File \"/g...
14580/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...Traceback (most recent call last):\\n File \"/g...
14789/glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2...Traceback (most recent call last):\\n File \"/g...
\n", "
" ], "text/plain": [ " INVALID_ASSET \\\n", "6372 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... \n", "6373 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... \n", "6374 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... \n", "6583 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... \n", "14578 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... \n", "14579 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... \n", "14580 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... \n", "14789 /glade/collections/cmip/CMIP6/CFMIP/NCAR/CESM2... \n", "\n", " TRACEBACK \n", "6372 Traceback (most recent call last):\\n File \"/g... \n", "6373 Traceback (most recent call last):\\n File \"/g... \n", "6374 Traceback (most recent call last):\\n File \"/g... \n", "6583 Traceback (most recent call last):\\n File \"/g... \n", "14578 Traceback (most recent call last):\\n File \"/g... \n", "14579 Traceback (most recent call last):\\n File \"/g... \n", "14580 Traceback (most recent call last):\\n File \"/g... \n", "14789 Traceback (most recent call last):\\n File \"/g... " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b.invalid_assets" ] }, { "cell_type": "markdown", "id": "average-conditions", "metadata": {}, "source": [ "## Save built catalog to disk" ] }, { "cell_type": "code", "execution_count": 8, "id": "optical-burton", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Saved catalog location: /glade/scratch/mgrover/test-cmip6-catalog.json and /glade/scratch/mgrover/test-cmip6-catalog.csv\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/glade/u/home/mgrover/miniconda3/envs/cesm2-marbl/lib/python3.7/site-packages/ipykernel_launcher.py:24: UserWarning: Unable to parse 8 assets/files. A list of these assets can be found in /glade/scratch/mgrover/invalid_assets_test-cmip6-catalog.csv.\n" ] } ], "source": [ "b.save(\n", " '/glade/scratch/mgrover/test-cmip6-catalog.csv',\n", " path_column_name='path',\n", " variable_column_name='variable_id',\n", " data_format='netcdf',\n", " groupby_attrs=[\n", " 'activity_id',\n", " 'institution_id',\n", " 'source_id',\n", " 'experiment_id',\n", " 'table_id',\n", " 'grid_label',\n", " ],\n", " aggregations=[\n", " {'type': 'union', 'attribute_name': 'variable_id'},\n", " {\n", " 'type': 'join_existing',\n", " 'attribute_name': 'time_range',\n", " 'options': {'dim': 'time', 'coords': 'minimal', 'compat': 'override'},\n", " },\n", " {\n", " 'type': 'join_new',\n", " 'attribute_name': 'member_id',\n", " 'options': {'coords': 'minimal', 'compat': 'override'},\n", " },\n", " ],\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "id": "lesbian-quick", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"catalog_file\": \"test-cmip6-catalog.csv\",\n", " \"attributes\": [\n", " {\n", " \"column_name\": \"activity_id\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"branch_method\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"branch_time_in_child\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"branch_time_in_parent\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"experiment\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"experiment_id\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"frequency\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"grid\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"grid_label\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"institution_id\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"nominal_resolution\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"parent_activity_id\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"parent_experiment_id\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"parent_source_id\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"parent_time_units\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"parent_variant_label\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"product\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"realm\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"source_id\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"source_type\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"sub_experiment\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"sub_experiment_id\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"table_id\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"variable_id\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"variant_label\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"member_id\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"standard_name\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"long_name\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"units\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"vertical_levels\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"init_year\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"start_time\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"end_time\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"time_range\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"path\",\n", " \"vocabulary\": \"\"\n", " },\n", " {\n", " \"column_name\": \"version\",\n", " \"vocabulary\": \"\"\n", " }\n", " ],\n", " \"assets\": {\n", " \"column_name\": \"path\",\n", " \"format\": \"netcdf\"\n", " },\n", " \"aggregation_control\": {\n", " \"variable_column_name\": \"variable_id\",\n", " \"groupby_attrs\": [\n", " \"activity_id\",\n", " \"institution_id\",\n", " \"source_id\",\n", " \"experiment_id\",\n", " \"table_id\",\n", " \"grid_label\"\n", " ],\n", " \"aggregations\": [\n", " {\n", " \"type\": \"union\",\n", " \"attribute_name\": \"variable_id\",\n", " \"options\": null\n", " },\n", " {\n", " \"type\": \"join_existing\",\n", " \"attribute_name\": \"time_range\",\n", " \"options\": {\n", " \"dim\": \"time\",\n", " \"coords\": \"minimal\",\n", " \"compat\": \"override\"\n", " }\n", " },\n", " {\n", " \"type\": \"join_new\",\n", " \"attribute_name\": \"member_id\",\n", " \"options\": {\n", " \"coords\": \"minimal\",\n", " \"compat\": \"override\"\n", " }\n", " }\n", " ]\n", " },\n", " \"esmcat_version\": \"0.0.1\",\n", " \"id\": null,\n", " \"description\": null,\n", " \"last_updated\": \"2021-06-07T15:03:05+00:00\"\n", "}" ] } ], "source": [ "!cat /glade/scratch/abanihi/test-cmip6-catalog.json" ] }, { "cell_type": "code", "execution_count": null, "id": "beaee726-06ae-4838-bfb9-d17fd331fdbc", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:miniconda3-cesm2-marbl]", "language": "python", "name": "conda-env-miniconda3-cesm2-marbl-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 5 }