Skip to content

Dataset Config

TOML Example

[dataset]
  name = "dsg_rpm_2_v2.1"
  dataset_id = "load-profiles"
  version = "2.1"
  version_status = "Active"
  data_format = "parquet"
  schema_info = "columns: scenario_year, timestamp, rs_number, rpm_bus_num, total_electricity_kwh"
  weather_years = [2012]
  model_years = [2020, 2025, 2030, 2035, 2040, 2045]
  units = ["kWh"]
  scenarios = ["demand_high", "demand_moderate", "demand_stress"]
  sensitivities = []
  relevant_links = ["https://testlink1.com"] # e.g., an external transformation script
  description = """
      run 2 dsgrid handoff to rpm with climate-adjusted rld and cld loads.
      version 2.1 rs-level disaggregated loads for all load types with 8% assumed losses;
      v2.1 has fixed cld to RS node allocations
  """
  comments = "long_format with scenario_year for each scenario and year"
  resource_url = "https://api.hpc.nrel.gov/esif/docs/repo.html"
  geographic_extent = "RS-A"

  [dataset.location]
    system = "ESIFRepoAPI"
    description = "API docs can be found here https://api.hpc.nrel.gov/esif/docs/repo.html"
    url = "https://esif.hpc.nrel.gov/esif/api/repo/files"
    project = "1d3562e0-2a60-4e0a-9496-1032ee4b7db6"
    dataset = "7732b2af-f8fa-48c4-a0d9-a618ca3c374f"
    keyword = ""
    tag = []
    classification = []
    ids = []

  [dataset.registration_author]
    username = "mmooney"
    first_name = "Meghan"
    last_name = "Mooney"
    email = "Meghan.Mooney@nrel.gov"

  [dataset.source_code] # source code related to data
    location = "https://github.com/NREL/mycode/"  # e.g. github url
    branch = "test"

  [dataset.temporal_info]
    extent = "8760 for all project model years"
    fidelity = "hourly"

  [dataset.spatial_info]
    extent = "LA City"
    fidelity = "receiving station level (nodal within the city)"

Schema Definition

schemas

DatasetCreate

Dataset Checkin Schema

Attributes:

Name Type Description
comments str
data_format str | None
description str
display_name str | None
hash_value str
location dict
model_years list[int]
name str
other dict
previous_version str | None
registration_author UserCreate
relevant_links list[str]
resource_url str
scenarios list[str]
schema_info str | None
sensitivities list[str]
source_code SourceCode
spatial_info SpatialInfo
temporal_info TemporalInfo
units list[str]
version str
version_status VersionStatus
weather_years list[int]

comments = Field(title='comments', default='', description='Registration comments about this dataset')

data_format = Field(title='data_format', default=None, description='data format, or a list of formats separated by commas')

description = Field(title='description', description='The description of the scheduled dataset', default='')

display_name = Field(title='display_name', default=None, description='The dataset display name')

hash_value = Field(title='hash_value', default='', description='The hash value of this dataset used for integrity check.')

location = Field(title='location', description='The dataset location on data system')

model_years = Field(title='model_years', default=[], description='The model year(s) of the dataset')

name = Field(title='name', description='A short name')

other = Field(title='other', default={}, description='other metadata info about the dataset')

previous_version = Field(title='name of previous version of dataset path', default=None, description='Previous version of this dataset')

registration_author = Field(title='registration_author', description='The person who registered this dataset')

resource_url = Field(title='resource_url', default='', description='The resource URL for this dataset')

scenarios = Field(title='scenarios', description='The list of scenario names the dataset relates to')

schema_info = Field(title='schema_info', default='', description='The schema description of the dataset')

sensitivities = Field(title='sensitivities', default=[], description='The sensitivities of the dataset')

source_code = Field(title='source_code', description='The source code that produces the dataset')

spatial_info = Field(title='spatial_info', default={}, description='The spatial metadata of the dataset')

temporal_info = Field(title='temporal_info', default={}, description='The temportal metadata of the dataset')

units = Field(title='units', default=[], description='The units of the dataset')

version = Field(title='version', description='Dataset version')

version_status = Field(title='version_status', description='Dataset version status')

weather_years = Field(title='weather_years', default=[], description='The weather year(s) of the dataset')

SpatialInfo

Dataset spatial information

Attributes:

Name Type Description
extent str
fidelity str
other dict

extent = Field(title='extent', default='', description='The spatial extent of the dataset')

fidelity = Field(title='fidelity', default='', description='The fidelity of the dataset in space')

other = Field(title='other', default={}, description='other info about spatial characteristics of data')

TemporalInfo

Dataset temporal information

Attributes:

Name Type Description
extent str
fidelity str
other dict

extent = Field(title='extent', default='', description='The temporal extent of the dataset')

fidelity = Field(title='fidelity', default='', description='The fidelity of the dataset in time')

other = Field(title='other', default={}, description='other info about temporal characteristics of data')

VersionStatus

Attributes:

Name Type Description
Active
Inactivate
Unresolved

Active = 'Active'

Inactivate = 'Inactivate'

Unresolved = 'Unresolved'

schemas

UserCreate

User base model

Attributes:

Name Type Description
email EmailStr
first_name str | None
last_name str | None
organization str | None

email = Field(title='email', to_lower=True, description='Email address')

first_name = Field(title='first_name', default=None, description='First name')

last_name = Field(title='last_name', default=None, description='Last name')

organization = Field(title='organization', default=None, description='Organization name')

schemas

SourceCode

Source Model Schema

Attributes:

Name Type Description
branch str | None
image str | None
location str
tag str | None

branch = Field(title='branch', default='', description='The git branch of source code')

image = Field(title='image', default='', description='The location of container image')

location = Field(title='location', description='The location of the source code')

tag = Field(title='tag', default='', description='The git tag of source code')