Coverage for hdl_registers/parser/parser.py: 97%
229 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-30 20:52 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-30 20:52 +0000
1# --------------------------------------------------------------------------------------------------
2# Copyright (c) Lukas Vik. All rights reserved.
3#
4# This file is part of the hdl-registers project, an HDL register generator fast enough to run
5# in real time.
6# https://hdl-registers.com
7# https://github.com/hdl-registers/hdl-registers
8# --------------------------------------------------------------------------------------------------
10from __future__ import annotations
12import copy
13import json
14from typing import TYPE_CHECKING, Any, ClassVar
16import tomli_w
17import yaml
18from tsfpga import DEFAULT_FILE_ENCODING
20from hdl_registers.about import WEBSITE_URL
21from hdl_registers.constant.bit_vector_constant import UnsignedVector
22from hdl_registers.register_list import RegisterList
23from hdl_registers.register_modes import REGISTER_MODES
25if TYPE_CHECKING:
26 from pathlib import Path
28 from hdl_registers.register import Register
29 from hdl_registers.register_mode import RegisterMode
32class RegisterParser:
33 """
34 Parse register data in the form of a dictionary into a :class:`.RegisterList` object.
35 See :ref:`toml_format` for further documentation.
37 A note on sanity check strategy:
38 The parser performs only the basic sanity checks related to the data file format.
39 For example, missing properties, unknown properties, etc.
40 A lot of other sanity checks are performed in the :class:`.Register`, :class:`.RegisterArray`,
41 :class:`.RegisterField`, etc, classes themselves.
43 For example, the default value of a bit field should be a string with the value "0" or "1".
44 This is checked in the constructor of the :class:`.Bit` class, not here in the parser.
45 Similar for a lot of other things.
47 This is because these objects can be created from the Python API also, without involving
48 the parser.
49 Hence these sanity checks have to be present there.
50 Having them also in the parser would enable better error messages, but would be redundant
51 and would slow down the parser.
52 Since the parser is run in real time, the performance is critical, and we can not afford
53 to slow it down.
54 """
56 # Attributes of the constant.
57 recognized_constant_items: ClassVar = {"type", "value", "description", "data_type"}
58 # Note that "type" being present is implied. We would not be parsing a constant unless we
59 # know it to be a "constant" type.
60 # So we save some CPU cycles by not checking for it.
61 required_constant_items: ClassVar = ["value"]
63 # Attributes of the register.
64 # Anything apart from these are names of fields.
65 default_register_items: ClassVar = {
66 "type",
67 "mode",
68 "description",
69 }
70 # While a 'mode' is required for a register, it may NOT be specified/changed in the data file
71 # for a default register.
72 # Hence this property is handled separately.
73 # And hence, registers have no required items.
75 # Attributes of the register array.
76 # Anything apart from these are names of registers.
77 default_register_array_items: ClassVar = {"type", "array_length", "description"}
78 # Note that "type" being present is implied.
79 # We would not be parsing a register array unless we know it to be a "register_array" type.
80 # So we save some CPU cycles by not checking for it.
81 required_register_array_items: ClassVar = ["array_length"]
83 # Attributes of the "bit" register field.
84 recognized_bit_items: ClassVar = {"type", "description", "default_value"}
85 # Note that "type" being present is implied.
86 # We would not be parsing a bit unless we know it to be a "bit" type.
87 # So we save some CPU cycles by not checking for it.
88 required_bit_items: ClassVar[list[str]] = []
90 # Attributes of the "bit_vector" register field.
91 recognized_bit_vector_items: ClassVar = {"type", "description", "width", "default_value"}
92 # Note that "type" being present is implied.
93 # We would not be parsing a bit_vector unless we know it to be a "bit_vector" type.
94 # So we save some CPU cycles by not checking for it.
95 required_bit_vector_items: ClassVar = ["width"]
97 # Attributes of the "enumeration" register field.
98 recognized_enumeration_items: ClassVar = {"type", "description", "default_value", "element"}
99 # Note that "type" being present is implied.
100 # We would not be parsing a enumeration unless we know it to be a "enumeration" type.
101 # So we save some CPU cycles by not checking for it.
102 required_enumeration_items: ClassVar = ["element"]
104 # Attributes of the "integer" register field.
105 recognized_integer_items: ClassVar = {
106 "type",
107 "description",
108 "min_value",
109 "max_value",
110 "default_value",
111 }
112 # Note that "type" being present is implied.
113 # We would not be parsing a integer unless we know it to be a "integer" type.
114 # So we save some CPU cycles by not checking for it.
115 required_integer_items: ClassVar = ["max_value"]
117 def __init__(
118 self,
119 name: str,
120 source_definition_file: Path,
121 default_registers: list[Register] | None = None,
122 ) -> None:
123 """
124 Arguments:
125 name: The name of the register list.
126 source_definition_file: The source file that defined this register list.
127 Will be displayed in generated source code and documentation
128 for traceability.
129 default_registers: List of default registers.
130 Note that this list with :class:`.Register` objects will be deep copied, so you can
131 use the same list many times without worrying about mutability.
132 """
133 self._register_list = RegisterList(name=name, source_definition_file=source_definition_file)
134 self._source_definition_file = source_definition_file
136 self._default_register_names = []
137 if default_registers:
138 # Perform deep copy of the mutable register objects.
139 self._register_list.register_objects = copy.deepcopy(default_registers)
140 for register in default_registers:
141 self._default_register_names.append(register.name)
143 def parse(self, register_data: dict[str, Any]) -> RegisterList:
144 """
145 Parse the register data.
147 Arguments:
148 register_data: Register data as a dictionary.
149 Preferably read by the :func:`.from_toml`, :func:`.from_json` or
150 :func:`.from_yaml` functions.
152 Return:
153 The resulting register list.
154 """
155 for old_top_level_key_name in ["constant", "register", "register_array"]:
156 if old_top_level_key_name in register_data:
157 source_file = self._source_definition_file
158 output_file = (
159 source_file.parent.resolve()
160 / f"{source_file.stem}_version_6_format{source_file.suffix}"
161 )
163 print(
164 f"""
165ERROR: Parsing register data that appears to be in the old pre-6.0.0 format.
166ERROR: For more information, see: {WEBSITE_URL}/rst/about/new_data_file_format.html
167ERROR: Your data will be automatically converted to the new format and saved to: {output_file}
168ERROR: Please inspect that file and update your data file to the new format.
169"""
170 )
171 _save_to_new_format(old_data=register_data, output_file=output_file)
172 raise ValueError("Found register data in old format. See message above.")
174 parser_methods = {
175 "constant": self._parse_constant,
176 "register": self._parse_plain_register,
177 "register_array": self._parse_register_array,
178 }
180 for top_level_name, top_level_items in register_data.items():
181 if not isinstance(top_level_items, dict):
182 message = (
183 f"Error while parsing {self._source_definition_file}: "
184 f'Got unknown top-level property "{top_level_name}".'
185 )
186 # Seems to the linter like a type error, but it is actually the user specifying
187 # a property/value that they shouldn't.
188 # Corresponds better to a 'ValueError' than a 'TypeError'.
189 raise ValueError(message) # noqa: TRY004
191 top_level_type = top_level_items.get("type", "register")
193 if top_level_type not in parser_methods:
194 valid_types_str = ", ".join(f'"{parser_key}"' for parser_key in parser_methods)
195 message = (
196 f'Error while parsing "{top_level_name}" in {self._source_definition_file}: '
197 f'Got unknown type "{top_level_type}". Expected one of {valid_types_str}.'
198 )
199 raise ValueError(message)
201 parser_methods[top_level_type](name=top_level_name, items=top_level_items)
203 return self._register_list
205 def _parse_constant(self, name: str, items: dict[str, Any]) -> None:
206 for item_name in self.required_constant_items:
207 if item_name not in items:
208 message = (
209 f'Error while parsing constant "{name}" in {self._source_definition_file}: '
210 f'Missing required property "{item_name}".'
211 )
212 raise ValueError(message)
214 for item_name in items:
215 if item_name not in self.recognized_constant_items:
216 message = (
217 f'Error while parsing constant "{name}" in {self._source_definition_file}: '
218 f'Got unknown property "{item_name}".'
219 )
220 raise ValueError(message)
222 value = items["value"]
223 description = items.get("description", "")
224 data_type_str = items.get("data_type")
226 if data_type_str is not None:
227 if not isinstance(value, str):
228 raise ValueError(
229 f'Error while parsing constant "{name}" in '
230 f"{self._source_definition_file}: "
231 'May not set "data_type" for non-string constant.'
232 )
234 if data_type_str == "unsigned":
235 value = UnsignedVector(value)
236 else:
237 raise ValueError(
238 f'Error while parsing constant "{name}" in '
239 f"{self._source_definition_file}: "
240 f'Invalid data type "{data_type_str}".'
241 )
243 self._register_list.add_constant(name=name, value=value, description=description)
245 def _parse_plain_register(self, name: str, items: dict[str, Any]) -> None:
246 description = items.get("description", "")
248 if name in self._default_register_names:
249 # Default registers can be "updated" in the sense that the user can set a custom
250 # 'description' and add whatever fields they want in the current register list.
251 # They may not, however, change the 'mode' which is part of the default definition.
252 if "mode" in items:
253 message = (
254 f'Error while parsing register "{name}" in {self._source_definition_file}: '
255 'A "mode" may not be specified for a default register.'
256 )
257 raise ValueError(message)
259 register = self._register_list.get_register(register_name=name)
260 register.description = description
262 else:
263 # If it is a new register however, the 'mode' has to be specified.
264 if "mode" not in items:
265 message = (
266 f'Error while parsing register "{name}" in {self._source_definition_file}: '
267 f'Missing required property "mode".'
268 )
269 raise ValueError(message)
271 mode = self._get_mode(mode_name=items["mode"], register_name=name)
273 register = self._register_list.append_register(
274 name=name, mode=mode, description=description
275 )
277 self._parse_register_fields(register=register, register_items=items, register_array_note="")
279 def _get_mode(self, mode_name: str, register_name: str) -> RegisterMode:
280 if mode_name in REGISTER_MODES:
281 return REGISTER_MODES[mode_name]
283 valid_modes_str = ", ".join(f'"{mode_key}"' for mode_key in REGISTER_MODES)
284 message = (
285 f'Error while parsing register "{register_name}" in {self._source_definition_file}: '
286 f'Got unknown mode "{mode_name}". Expected one of {valid_modes_str}.'
287 )
288 raise ValueError(message)
290 def _parse_register_fields(
291 self,
292 register_items: dict[str, Any],
293 register: Register,
294 register_array_note: str,
295 ) -> None:
296 # Add any fields that are specified.
297 for item_name, item_value in register_items.items():
298 # Skip default items so we only get the fields.
299 if item_name in self.default_register_items:
300 continue
302 if not isinstance(item_value, dict):
303 message = (
304 f'Error while parsing register "{register.name}"{register_array_note} '
305 f"in {self._source_definition_file}: "
306 f'Got unknown property "{item_name}".'
307 )
308 # Seems to the linter like a type error, but it is actually the user specifying
309 # a property/value that they shouldn't.
310 # Corresponds better to a 'ValueError' than a 'TypeError'.
311 raise ValueError(message) # noqa: TRY004
313 if "type" not in item_value:
314 message = (
315 f'Error while parsing field "{item_name}" in register '
316 f'"{register.name}"{register_array_note} in {self._source_definition_file}: '
317 'Missing required property "type".'
318 )
319 raise ValueError(message)
321 field_type = item_value["type"]
323 parser_methods = {
324 "bit": self._parse_bit,
325 "bit_vector": self._parse_bit_vector,
326 "enumeration": self._parse_enumeration,
327 "integer": self._parse_integer,
328 }
330 if field_type not in parser_methods:
331 valid_types_str = ", ".join(f'"{parser_key}"' for parser_key in parser_methods)
332 message = (
333 f'Error while parsing field "{item_name}" in register '
334 f'"{register.name}"{register_array_note} in {self._source_definition_file}: '
335 f'Unknown field type "{field_type}". Expected one of {valid_types_str}.'
336 )
337 raise ValueError(message)
339 parser_methods[field_type](
340 register=register, field_name=item_name, field_items=item_value
341 )
343 def _parse_register_array(self, name: str, items: dict[str, Any]) -> None:
344 for required_property in self.required_register_array_items:
345 if required_property not in items:
346 message = (
347 f'Error while parsing register array "{name}" in '
348 f"{self._source_definition_file}: "
349 f'Missing required property "{required_property}".'
350 )
351 raise ValueError(message)
353 register_array_length = items["array_length"]
354 register_array_description = items.get("description", "")
355 register_array = self._register_list.append_register_array(
356 name=name, length=register_array_length, description=register_array_description
357 )
359 # Add all registers that are specified.
360 found_at_least_one_register = False
361 for item_name, item_value in items.items():
362 # Skip default items so we only get the registers.
363 if item_name in self.default_register_array_items:
364 continue
366 found_at_least_one_register = True
368 if not isinstance(item_value, dict):
369 message = (
370 f'Error while parsing register array "{name}" in '
371 f"{self._source_definition_file}: "
372 f'Got unknown property "{item_name}".'
373 )
374 # Seems to the linter like a type error, but it is actually the user specifying
375 # a property/value that they shouldn't.
376 # Corresponds better to a 'ValueError' than a 'TypeError'.
377 raise ValueError(message) # noqa: TRY004
379 item_type = item_value.get("type", "register")
380 if item_type != "register":
381 message = (
382 f'Error while parsing register "{item_name}" within array "{name}" in '
383 f"{self._source_definition_file}: "
384 f'Got unknown type "{item_type}". Expected "register".'
385 )
386 raise ValueError(message)
388 # A 'mode' is semi-required for plain registers, but always required for
389 # array registers.
390 if "mode" not in item_value:
391 raise ValueError(
392 f'Error while parsing register "{item_name}" within array "{name}" in '
393 f"{self._source_definition_file}: "
394 f'Missing required property "mode".'
395 )
396 register_mode = self._get_mode(mode_name=item_value["mode"], register_name=item_name)
398 register_description = item_value.get("description", "")
400 register = register_array.append_register(
401 name=item_name, mode=register_mode, description=register_description
402 )
404 self._parse_register_fields(
405 register_items=item_value,
406 register=register,
407 register_array_note=f' within array "{name}"',
408 )
410 if not found_at_least_one_register:
411 message = (
412 f'Error while parsing register array "{name}" in {self._source_definition_file}: '
413 "Array must contain at least one register."
414 )
415 raise ValueError(message)
417 def _check_field_items(
418 self,
419 register_name: str,
420 field_name: str,
421 field_items: dict[str, Any],
422 recognized_items: set[str],
423 required_items: list[str],
424 ) -> None:
425 """
426 Will raise exception if anything is wrong.
427 """
428 for item_name in required_items:
429 if item_name not in field_items:
430 message = (
431 f'Error while parsing field "{field_name}" in register "{register_name}" in '
432 f"{self._source_definition_file}: "
433 f'Missing required property "{item_name}".'
434 )
435 raise ValueError(message)
437 for item_name in field_items:
438 if item_name not in recognized_items:
439 message = (
440 f'Error while parsing field "{field_name}" in register '
441 f'"{register_name}" in {self._source_definition_file}: '
442 f'Unknown property "{item_name}".'
443 )
444 raise ValueError(message)
446 def _parse_bit(self, register: Register, field_name: str, field_items: dict[str, Any]) -> None:
447 self._check_field_items(
448 register_name=register.name,
449 field_name=field_name,
450 field_items=field_items,
451 recognized_items=self.recognized_bit_items,
452 required_items=self.required_bit_items,
453 )
455 description = field_items.get("description", "")
456 default_value = field_items.get("default_value", "0")
458 register.append_bit(name=field_name, description=description, default_value=default_value)
460 def _parse_bit_vector(
461 self, register: Register, field_name: str, field_items: dict[str, Any]
462 ) -> None:
463 self._check_field_items(
464 register_name=register.name,
465 field_name=field_name,
466 field_items=field_items,
467 recognized_items=self.recognized_bit_vector_items,
468 required_items=self.required_bit_vector_items,
469 )
471 width = field_items["width"]
473 description = field_items.get("description", "")
474 default_value = field_items.get("default_value", 0)
476 register.append_bit_vector(
477 name=field_name, description=description, width=width, default_value=default_value
478 )
480 def _parse_enumeration(
481 self, register: Register, field_name: str, field_items: dict[str, Any]
482 ) -> None:
483 self._check_field_items(
484 register_name=register.name,
485 field_name=field_name,
486 field_items=field_items,
487 recognized_items=self.recognized_enumeration_items,
488 # Check that we have at least one element.
489 # This is checked also in the Enumeration class, which is needed if the user
490 # is working directly with the Python API.
491 # That is where we usually sanity check, to avoid duplication.
492 # However, this particular check is needed here also since the logic for default
493 # value below does not work if there are no elements.
494 required_items=self.required_enumeration_items,
495 )
497 description = field_items.get("description", "")
498 # We assert above that the enumeration has at least one element.
499 # Meaning that the result of this get can not be None.
500 elements: dict[str, str] = field_items.get("element")
502 # The default "default value" is the first declared enumeration element.
503 # Note that this works because dictionaries in Python are guaranteed ordered since
504 # Python 3.7.
505 default_value = field_items.get("default_value", next(iter(elements)))
507 register.append_enumeration(
508 name=field_name,
509 description=description,
510 elements=elements,
511 default_value=default_value,
512 )
514 def _parse_integer(
515 self, register: Register, field_name: str, field_items: dict[str, Any]
516 ) -> None:
517 self._check_field_items(
518 register_name=register.name,
519 field_name=field_name,
520 field_items=field_items,
521 recognized_items=self.recognized_integer_items,
522 required_items=self.required_integer_items,
523 )
525 max_value = field_items["max_value"]
527 description = field_items.get("description", "")
528 min_value = field_items.get("min_value", 0)
529 default_value = field_items.get("default_value", min_value)
531 register.append_integer(
532 name=field_name,
533 description=description,
534 min_value=min_value,
535 max_value=max_value,
536 default_value=default_value,
537 )
540def _convert_to_new_format( # noqa: C901
541 old_data: dict[str, Any],
542) -> dict[str, Any]:
543 """
544 Convert pre-6.0.0 format to the new format.
545 This is a semi-trash function that will be removed in the future.
546 """
548 def _get_register_dict(register_items: dict[str, Any]) -> dict[str, Any]:
549 register_dict = {}
551 for register_item_name, register_item_value in register_items.items():
552 if register_item_name in RegisterParser.default_register_items:
553 register_dict[register_item_name] = register_item_value
555 elif register_item_name in ["bit", "bit_vector", "enumeration", "integer"]:
556 for field_name, field_items in register_item_value.items():
557 field_dict = {"type": register_item_name}
558 field_dict.update(dict(field_items.items()))
560 register_dict[field_name] = field_dict
562 else:
563 raise ValueError(
564 f"Unknown item {register_item_name}. Looks like an error in the user data file."
565 )
567 return register_dict
569 result = {}
571 def _add_item(name: str, items: dict[str, Any]) -> None:
572 if name in result:
573 raise ValueError(f"Duplicate item {name}")
575 result[name] = items
577 if "register" in old_data:
578 for register_name, register_items in old_data["register"].items():
579 register_dict = _get_register_dict(register_items=register_items)
580 _add_item(name=register_name, items=register_dict)
582 if "register_array" in old_data:
583 for register_array_name, register_array_items in old_data["register_array"].items():
584 register_array_dict: dict[str, Any] = {"type": "register_array"}
586 for register_array_item_name, register_array_item_value in register_array_items.items():
587 if register_array_item_name in RegisterParser.default_register_array_items:
588 register_array_dict[register_array_item_name] = register_array_item_value
590 elif register_array_item_name == "register":
591 for register_name, register_items in register_array_item_value.items():
592 register_array_dict[register_name] = _get_register_dict(
593 register_items=register_items
594 )
596 else:
597 raise ValueError(
598 f"Unknown item {register_array_item_name}. "
599 "Looks like an error in the user data file."
600 )
602 _add_item(name=register_array_name, items=register_array_dict)
604 if "constant" in old_data:
605 for constant_name, constant_items in old_data["constant"].items():
606 constant_dict = {"type": "constant"}
607 constant_dict.update(dict(constant_items.items()))
609 _add_item(name=constant_name, items=constant_dict)
611 return result
614def _save_to_new_format(old_data: dict[str, Any], output_file: Path) -> None:
615 """
616 Save the old data to the new format.
617 """
618 new_data = _convert_to_new_format(old_data=old_data)
620 if output_file.suffix == ".toml":
621 with output_file.open("wb") as file_handle:
622 tomli_w.dump(new_data, file_handle, multiline_strings=True)
624 return
626 if output_file.suffix == ".json":
627 with output_file.open("w", encoding=DEFAULT_FILE_ENCODING) as file_handle:
628 json.dump(new_data, file_handle, indent=4)
630 return
632 if output_file.suffix == ".yaml":
633 with output_file.open("w", encoding=DEFAULT_FILE_ENCODING) as file_handle:
634 yaml.dump(new_data, file_handle)
636 return
638 raise ValueError(f"Unknown file format {output_file}")