Coverage for hdl_registers/parser/parser.py: 98%
248 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-02 20:54 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-02 20:54 +0000
1# --------------------------------------------------------------------------------------------------
2# Copyright (c) Lukas Vik. All rights reserved.
3#
4# This file is part of the hdl-registers project, an HDL register generator fast enough to run
5# in real time.
6# https://hdl-registers.com
7# https://github.com/hdl-registers/hdl-registers
8# --------------------------------------------------------------------------------------------------
10from __future__ import annotations
12import copy
13import json
14from enum import Enum
15from typing import TYPE_CHECKING, Any, ClassVar
17import tomli_w
18import yaml
19from tsfpga import DEFAULT_FILE_ENCODING
21from hdl_registers.about import WEBSITE_URL
22from hdl_registers.constant.bit_vector_constant import UnsignedVector
23from hdl_registers.field.numerical_interpretation import (
24 Signed,
25 SignedFixedPoint,
26 Unsigned,
27 UnsignedFixedPoint,
28)
29from hdl_registers.register_list import RegisterList
30from hdl_registers.register_modes import REGISTER_MODES
32if TYPE_CHECKING:
33 from pathlib import Path
35 from hdl_registers.register import Register
36 from hdl_registers.register_mode import RegisterMode
39class RegisterParser:
40 """
41 Parse register data in the form of a dictionary into a :class:`.RegisterList` object.
42 See :ref:`toml_format` for further documentation.
44 A note on sanity check strategy:
45 The parser performs only the basic sanity checks related to the data file format.
46 For example, missing properties, unknown properties, etc.
47 A lot of other sanity checks are performed in the :class:`.Register`, :class:`.RegisterArray`,
48 :class:`.RegisterField`, etc, classes themselves.
50 For example, the default value of a bit field should be a string with the value "0" or "1".
51 This is checked in the constructor of the :class:`.Bit` class, not here in the parser.
52 Similar for a lot of other things.
54 This is because these objects can be created from the Python API also, without involving
55 the parser.
56 Hence these sanity checks have to be present there.
57 Having them also in the parser would enable better error messages, but would be redundant
58 and would slow down the parser.
59 Since the parser is run in real time, the performance is critical, and we can not afford
60 to slow it down.
61 """
63 # Attributes of the constant.
64 recognized_constant_items: ClassVar = {"type", "value", "description", "data_type"}
65 # Note that "type" being present is implied.
66 # We would not be parsing a constant unless "type" equal to "constant" was seen.
67 # So we save some CPU cycles by not checking for it.
68 required_constant_items: ClassVar = ["value"]
70 # Attributes of the register.
71 # Anything apart from these are names of fields.
72 default_register_items: ClassVar = {
73 "type",
74 "mode",
75 "description",
76 }
77 # While a 'mode' is required for a register, it may NOT be specified/changed in the data file
78 # for a default register.
79 # Hence this property is handled separately.
80 # And hence, registers have no required items.
82 # Attributes of the register array.
83 # Anything apart from these are names of registers.
84 default_register_array_items: ClassVar = {"type", "array_length", "description"}
85 # Note that "type" being present is implied.
86 # We would not be parsing a register array unless "type" equal to "register_array" was seen.
87 # So we save some CPU cycles by not checking for it.
88 required_register_array_items: ClassVar = ["array_length"]
90 # Attributes of the "bit" register field.
91 recognized_bit_items: ClassVar = {"type", "description", "default_value"}
92 # Note that "type" being present is implied.
93 # We would not be parsing a bit unless "type" equal to "bit" was seen.
94 # So we save some CPU cycles by not checking for it.
95 required_bit_items: ClassVar[list[str]] = []
97 # Attributes of the "bit_vector" register field.
98 recognized_bit_vector_items: ClassVar = {
99 "type",
100 "description",
101 "width",
102 "default_value",
103 "numerical_interpretation",
104 "min_bit_index",
105 }
106 # Note that "type" being present is implied.
107 # We would not be parsing a bit vector unless "type" equal to "bit_vector" was seen.
108 # So we save some CPU cycles by not checking for it.
109 required_bit_vector_items: ClassVar = ["width"]
111 # The "numerical_interpretation" property of a "bit_vector" field may take only these values.
112 class _RecognizedBitVectorNumericalInterpretationItems(Enum):
113 UNSIGNED = "unsigned"
114 SIGNED = "signed"
115 UNSIGNED_FIXED_POINT = "unsigned_fixed_point"
116 SIGNED_FIXED_POINT = "signed_fixed_point"
118 # Attributes of the "enumeration" register field.
119 recognized_enumeration_items: ClassVar = {"type", "description", "default_value", "element"}
120 # Note that "type" being present is implied.
121 # We would not be parsing an enumeration unless "type" equal to "enumeration" was seen.
122 # So we save some CPU cycles by not checking for it.
123 required_enumeration_items: ClassVar = ["element"]
125 # Attributes of the "integer" register field.
126 recognized_integer_items: ClassVar = {
127 "type",
128 "description",
129 "min_value",
130 "max_value",
131 "default_value",
132 }
133 # Note that "type" being present is implied.
134 # We would not be parsing an integer unless "type" equal to "integer" was seen.
135 # So we save some CPU cycles by not checking for it.
136 required_integer_items: ClassVar = ["max_value"]
138 def __init__(
139 self,
140 name: str,
141 source_definition_file: Path,
142 default_registers: list[Register] | None = None,
143 ) -> None:
144 """
145 Arguments:
146 name: The name of the register list.
147 source_definition_file: The source file that defined this register list.
148 Will be displayed in generated source code and documentation
149 for traceability.
150 default_registers: List of default registers.
151 Note that this list with :class:`.Register` objects will be deep copied, so you can
152 use the same list many times without worrying about mutability.
153 """
154 self._register_list = RegisterList(name=name, source_definition_file=source_definition_file)
155 self._source_definition_file = source_definition_file
157 self._default_register_names = []
158 if default_registers:
159 # Perform deep copy of the mutable register objects.
160 self._register_list.register_objects = copy.deepcopy(default_registers)
161 for register in default_registers:
162 self._default_register_names.append(register.name)
164 def parse(self, register_data: dict[str, Any]) -> RegisterList:
165 """
166 Parse the register data.
168 Arguments:
169 register_data: Register data as a dictionary.
170 Preferably read by the :func:`.from_toml`, :func:`.from_json` or
171 :func:`.from_yaml` functions.
173 Return:
174 The resulting register list.
175 """
176 for old_top_level_key_name in ["constant", "register", "register_array"]:
177 if old_top_level_key_name in register_data:
178 source_file = self._source_definition_file
179 output_file = (
180 source_file.parent.resolve()
181 / f"{source_file.stem}_version_6_format{source_file.suffix}"
182 )
184 print(
185 f"""
186ERROR: Parsing register data that appears to be in the old pre-6.0.0 format.
187ERROR: For more information, see: {WEBSITE_URL}/rst/about/new_data_file_format.html
188ERROR: Your data will be automatically converted to the new format and saved to: {output_file}
189ERROR: Please inspect that file and update your data file to the new format.
190"""
191 )
192 _save_to_new_format(old_data=register_data, output_file=output_file)
193 raise ValueError("Found register data in old format. See message above.")
195 parser_methods = {
196 "constant": self._parse_constant,
197 "register": self._parse_plain_register,
198 "register_array": self._parse_register_array,
199 }
201 for top_level_name, top_level_items in register_data.items():
202 if not isinstance(top_level_items, dict):
203 message = (
204 f"Error while parsing {self._source_definition_file}: "
205 f'Got unknown top-level property "{top_level_name}".'
206 )
207 # Seems to the linter like a type error, but it is actually the user specifying
208 # a property/value that they shouldn't.
209 # Corresponds better to a 'ValueError' than a 'TypeError'.
210 raise ValueError(message) # noqa: TRY004
212 top_level_type = top_level_items.get("type", "register")
214 if top_level_type not in parser_methods:
215 valid_types_str = ", ".join(f'"{parser_key}"' for parser_key in parser_methods)
216 message = (
217 f'Error while parsing "{top_level_name}" in {self._source_definition_file}: '
218 f'Got unknown type "{top_level_type}". Expected one of {valid_types_str}.'
219 )
220 raise ValueError(message)
222 parser_methods[top_level_type](name=top_level_name, items=top_level_items)
224 return self._register_list
226 def _parse_constant(self, name: str, items: dict[str, Any]) -> None:
227 for item_name in self.required_constant_items:
228 if item_name not in items:
229 message = (
230 f'Error while parsing constant "{name}" in {self._source_definition_file}: '
231 f'Missing required property "{item_name}".'
232 )
233 raise ValueError(message)
235 for item_name in items:
236 if item_name not in self.recognized_constant_items:
237 message = (
238 f'Error while parsing constant "{name}" in {self._source_definition_file}: '
239 f'Got unknown property "{item_name}".'
240 )
241 raise ValueError(message)
243 value = items["value"]
244 description = items.get("description", "")
245 data_type_str = items.get("data_type")
247 if data_type_str is not None:
248 if not isinstance(value, str):
249 raise ValueError(
250 f'Error while parsing constant "{name}" in '
251 f"{self._source_definition_file}: "
252 'May not set "data_type" for non-string constant.'
253 )
255 if data_type_str == "unsigned":
256 value = UnsignedVector(value)
257 else:
258 raise ValueError(
259 f'Error while parsing constant "{name}" in '
260 f"{self._source_definition_file}: "
261 f'Invalid data type "{data_type_str}".'
262 )
264 self._register_list.add_constant(name=name, value=value, description=description)
266 def _parse_plain_register(self, name: str, items: dict[str, Any]) -> None:
267 description = items.get("description", "")
269 if name in self._default_register_names:
270 # Default registers can be "updated" in the sense that the user can set a custom
271 # 'description' and add whatever fields they want in the current register list.
272 # They may not, however, change the 'mode' which is part of the default definition.
273 if "mode" in items:
274 message = (
275 f'Error while parsing register "{name}" in {self._source_definition_file}: '
276 'A "mode" may not be specified for a default register.'
277 )
278 raise ValueError(message)
280 register = self._register_list.get_register(register_name=name)
281 register.description = description
283 else:
284 # If it is a new register however, the 'mode' has to be specified.
285 if "mode" not in items:
286 message = (
287 f'Error while parsing register "{name}" in {self._source_definition_file}: '
288 f'Missing required property "mode".'
289 )
290 raise ValueError(message)
292 mode = self._get_mode(mode_name=items["mode"], register_name=name)
294 register = self._register_list.append_register(
295 name=name, mode=mode, description=description
296 )
298 self._parse_register_fields(register=register, register_items=items, register_array_note="")
300 def _get_mode(self, mode_name: str, register_name: str) -> RegisterMode:
301 if mode_name in REGISTER_MODES:
302 return REGISTER_MODES[mode_name]
304 valid_modes_str = ", ".join(f'"{mode_key}"' for mode_key in REGISTER_MODES)
305 message = (
306 f'Error while parsing register "{register_name}" in {self._source_definition_file}: '
307 f'Got unknown mode "{mode_name}". Expected one of {valid_modes_str}.'
308 )
309 raise ValueError(message)
311 def _parse_register_fields(
312 self,
313 register_items: dict[str, Any],
314 register: Register,
315 register_array_note: str,
316 ) -> None:
317 # Add any fields that are specified.
318 for item_name, item_value in register_items.items():
319 # Skip default items so we only get the fields.
320 if item_name in self.default_register_items:
321 continue
323 if not isinstance(item_value, dict):
324 message = (
325 f'Error while parsing register "{register.name}"{register_array_note} '
326 f"in {self._source_definition_file}: "
327 f'Got unknown property "{item_name}".'
328 )
329 # Seems to the linter like a type error, but it is actually the user specifying
330 # a property/value that they shouldn't.
331 # Corresponds better to a 'ValueError' than a 'TypeError'.
332 raise ValueError(message) # noqa: TRY004
334 if "type" not in item_value:
335 message = (
336 f'Error while parsing field "{item_name}" in register '
337 f'"{register.name}"{register_array_note} in {self._source_definition_file}: '
338 'Missing required property "type".'
339 )
340 raise ValueError(message)
342 field_type = item_value["type"]
344 parser_methods = {
345 "bit": self._parse_bit,
346 "bit_vector": self._parse_bit_vector,
347 "enumeration": self._parse_enumeration,
348 "integer": self._parse_integer,
349 }
351 if field_type not in parser_methods:
352 valid_types_str = ", ".join(f'"{parser_key}"' for parser_key in parser_methods)
353 message = (
354 f'Error while parsing field "{item_name}" in register '
355 f'"{register.name}"{register_array_note} in {self._source_definition_file}: '
356 f'Unknown field type "{field_type}". Expected one of {valid_types_str}.'
357 )
358 raise ValueError(message)
360 parser_methods[field_type](
361 register=register,
362 field_name=item_name,
363 field_items=item_value,
364 register_array_note=register_array_note,
365 )
367 def _parse_register_array(self, name: str, items: dict[str, Any]) -> None:
368 for required_property in self.required_register_array_items:
369 if required_property not in items:
370 message = (
371 f'Error while parsing register array "{name}" in '
372 f"{self._source_definition_file}: "
373 f'Missing required property "{required_property}".'
374 )
375 raise ValueError(message)
377 register_array_length = items["array_length"]
378 register_array_description = items.get("description", "")
379 register_array = self._register_list.append_register_array(
380 name=name, length=register_array_length, description=register_array_description
381 )
383 # Add all registers that are specified.
384 found_at_least_one_register = False
385 for item_name, item_value in items.items():
386 # Skip default items so we only get the registers.
387 if item_name in self.default_register_array_items:
388 continue
390 found_at_least_one_register = True
392 if not isinstance(item_value, dict):
393 message = (
394 f'Error while parsing register array "{name}" in '
395 f"{self._source_definition_file}: "
396 f'Got unknown property "{item_name}".'
397 )
398 # Seems to the linter like a type error, but it is actually the user specifying
399 # a property/value that they shouldn't.
400 # Corresponds better to a 'ValueError' than a 'TypeError'.
401 raise ValueError(message) # noqa: TRY004
403 item_type = item_value.get("type", "register")
404 if item_type != "register":
405 message = (
406 f'Error while parsing register "{item_name}" within array "{name}" in '
407 f"{self._source_definition_file}: "
408 f'Got unknown type "{item_type}". Expected "register".'
409 )
410 raise ValueError(message)
412 # A 'mode' is semi-required for plain registers, but always required for
413 # array registers.
414 if "mode" not in item_value:
415 raise ValueError(
416 f'Error while parsing register "{item_name}" within array "{name}" in '
417 f"{self._source_definition_file}: "
418 f'Missing required property "mode".'
419 )
420 register_mode = self._get_mode(mode_name=item_value["mode"], register_name=item_name)
422 register_description = item_value.get("description", "")
424 register = register_array.append_register(
425 name=item_name, mode=register_mode, description=register_description
426 )
428 self._parse_register_fields(
429 register_items=item_value,
430 register=register,
431 register_array_note=f' within array "{name}"',
432 )
434 if not found_at_least_one_register:
435 message = (
436 f'Error while parsing register array "{name}" in {self._source_definition_file}: '
437 "Array must contain at least one register."
438 )
439 raise ValueError(message)
441 def _check_field_items(
442 self,
443 register_name: str,
444 field_name: str,
445 field_items: dict[str, Any],
446 recognized_items: set[str],
447 required_items: list[str],
448 register_array_note: str,
449 ) -> None:
450 """
451 Will raise exception if anything is wrong.
452 """
453 for item_name in required_items:
454 if item_name not in field_items:
455 message = (
456 f'Error while parsing field "{field_name}" in register '
457 f'"{register_name}"{register_array_note} in {self._source_definition_file}: '
458 f'Missing required property "{item_name}".'
459 )
460 raise ValueError(message)
462 for item_name in field_items:
463 if item_name not in recognized_items:
464 message = (
465 f'Error while parsing field "{field_name}" in register '
466 f'"{register_name}"{register_array_note} in {self._source_definition_file}: '
467 f'Unknown property "{item_name}".'
468 )
469 raise ValueError(message)
471 def _parse_bit(
472 self,
473 register: Register,
474 field_name: str,
475 field_items: dict[str, Any],
476 register_array_note: str,
477 ) -> None:
478 self._check_field_items(
479 register_name=register.name,
480 field_name=field_name,
481 field_items=field_items,
482 recognized_items=self.recognized_bit_items,
483 required_items=self.required_bit_items,
484 register_array_note=register_array_note,
485 )
487 description = field_items.get("description", "")
488 default_value = field_items.get("default_value", "0")
490 register.append_bit(name=field_name, description=description, default_value=default_value)
492 def _parse_bit_vector(
493 self,
494 register: Register,
495 field_name: str,
496 field_items: dict[str, Any],
497 register_array_note: str,
498 ) -> None:
499 self._check_field_items(
500 register_name=register.name,
501 field_name=field_name,
502 field_items=field_items,
503 recognized_items=self.recognized_bit_vector_items,
504 required_items=self.required_bit_vector_items,
505 register_array_note=register_array_note,
506 )
508 width = field_items["width"]
510 description = field_items.get("description", "")
511 default_value = field_items.get("default_value", 0)
513 min_bit_index = field_items.get("min_bit_index", 0)
514 max_bit_index = min_bit_index + width - 1
516 numerical_interpretation_str = field_items.get("numerical_interpretation", "unsigned")
517 match numerical_interpretation_str:
518 case self._RecognizedBitVectorNumericalInterpretationItems.UNSIGNED.value:
519 numerical_interpretation = Unsigned(bit_width=width)
520 case self._RecognizedBitVectorNumericalInterpretationItems.SIGNED.value:
521 numerical_interpretation = Signed(bit_width=width)
522 case self._RecognizedBitVectorNumericalInterpretationItems.UNSIGNED_FIXED_POINT.value:
523 numerical_interpretation = UnsignedFixedPoint(
524 max_bit_index=max_bit_index, min_bit_index=min_bit_index
525 )
526 case self._RecognizedBitVectorNumericalInterpretationItems.SIGNED_FIXED_POINT.value:
527 numerical_interpretation = SignedFixedPoint(
528 max_bit_index=max_bit_index, min_bit_index=min_bit_index
529 )
530 case _:
531 valid_interpretations_str = ", ".join(
532 [
533 f'"{interpretation.value}"'
534 for interpretation in self._RecognizedBitVectorNumericalInterpretationItems
535 ]
536 )
537 message = (
538 f'Error while parsing field "{field_name}" in register '
539 f'"{register.name}" in {self._source_definition_file}: '
540 f'Unknown value "{numerical_interpretation_str}" for '
541 'property "numerical_interpretation". '
542 f"Expected one of {valid_interpretations_str}."
543 )
544 raise ValueError(message)
546 register.append_bit_vector(
547 name=field_name,
548 description=description,
549 width=width,
550 default_value=default_value,
551 numerical_interpretation=numerical_interpretation,
552 )
554 def _parse_enumeration(
555 self,
556 register: Register,
557 field_name: str,
558 field_items: dict[str, Any],
559 register_array_note: str,
560 ) -> None:
561 self._check_field_items(
562 register_name=register.name,
563 field_name=field_name,
564 field_items=field_items,
565 recognized_items=self.recognized_enumeration_items,
566 # Check that we have at least one element.
567 # This is checked also in the Enumeration class, which is needed if the user
568 # is working directly with the Python API.
569 # That is where we usually sanity check, to avoid duplication.
570 # However, this particular check is needed here also since the logic for default
571 # value below does not work if there are no elements.
572 required_items=self.required_enumeration_items,
573 register_array_note=register_array_note,
574 )
576 description = field_items.get("description", "")
577 # We assert above that the enumeration has at least one element.
578 # Meaning that the result of this get can not be None.
579 elements: dict[str, str] = field_items.get("element")
581 # The default "default value" is the first declared enumeration element.
582 # Note that this works because dictionaries in Python are guaranteed ordered since
583 # Python 3.7.
584 default_value = field_items.get("default_value", next(iter(elements)))
586 register.append_enumeration(
587 name=field_name,
588 description=description,
589 elements=elements,
590 default_value=default_value,
591 )
593 def _parse_integer(
594 self,
595 register: Register,
596 field_name: str,
597 field_items: dict[str, Any],
598 register_array_note: str,
599 ) -> None:
600 self._check_field_items(
601 register_name=register.name,
602 field_name=field_name,
603 field_items=field_items,
604 recognized_items=self.recognized_integer_items,
605 required_items=self.required_integer_items,
606 register_array_note=register_array_note,
607 )
609 max_value = field_items["max_value"]
611 description = field_items.get("description", "")
612 min_value = field_items.get("min_value", 0)
613 default_value = field_items.get("default_value", min_value)
615 register.append_integer(
616 name=field_name,
617 description=description,
618 min_value=min_value,
619 max_value=max_value,
620 default_value=default_value,
621 )
624def _convert_to_new_format( # noqa: C901
625 old_data: dict[str, Any],
626) -> dict[str, Any]:
627 """
628 Convert pre-6.0.0 format to the new format.
629 This is a semi-trash function that will be removed in the future.
630 """
632 def _get_register_dict(register_items: dict[str, Any]) -> dict[str, Any]:
633 register_dict = {}
635 for register_item_name, register_item_value in register_items.items():
636 if register_item_name in RegisterParser.default_register_items:
637 register_dict[register_item_name] = register_item_value
639 elif register_item_name in ["bit", "bit_vector", "enumeration", "integer"]:
640 for field_name, field_items in register_item_value.items():
641 field_dict = {"type": register_item_name}
642 field_dict.update(dict(field_items.items()))
644 register_dict[field_name] = field_dict
646 else:
647 raise ValueError(
648 f"Unknown item {register_item_name}. Looks like an error in the user data file."
649 )
651 return register_dict
653 result = {}
655 def _add_item(name: str, items: dict[str, Any]) -> None:
656 if name in result:
657 raise ValueError(f"Duplicate item {name}")
659 result[name] = items
661 if "register" in old_data:
662 for register_name, register_items in old_data["register"].items():
663 register_dict = _get_register_dict(register_items=register_items)
664 _add_item(name=register_name, items=register_dict)
666 if "register_array" in old_data:
667 for register_array_name, register_array_items in old_data["register_array"].items():
668 register_array_dict: dict[str, Any] = {"type": "register_array"}
670 for register_array_item_name, register_array_item_value in register_array_items.items():
671 if register_array_item_name in RegisterParser.default_register_array_items:
672 register_array_dict[register_array_item_name] = register_array_item_value
674 elif register_array_item_name == "register":
675 for register_name, register_items in register_array_item_value.items():
676 register_array_dict[register_name] = _get_register_dict(
677 register_items=register_items
678 )
680 else:
681 raise ValueError(
682 f"Unknown item {register_array_item_name}. "
683 "Looks like an error in the user data file."
684 )
686 _add_item(name=register_array_name, items=register_array_dict)
688 if "constant" in old_data:
689 for constant_name, constant_items in old_data["constant"].items():
690 constant_dict = {"type": "constant"}
691 constant_dict.update(dict(constant_items.items()))
693 _add_item(name=constant_name, items=constant_dict)
695 return result
698def _save_to_new_format(old_data: dict[str, Any], output_file: Path) -> None:
699 """
700 Save the old data to the new format.
701 """
702 new_data = _convert_to_new_format(old_data=old_data)
704 if output_file.suffix == ".toml":
705 with output_file.open("wb") as file_handle:
706 tomli_w.dump(new_data, file_handle, multiline_strings=True)
708 return
710 if output_file.suffix == ".json":
711 with output_file.open("w", encoding=DEFAULT_FILE_ENCODING) as file_handle:
712 json.dump(new_data, file_handle, indent=4)
714 return
716 if output_file.suffix == ".yaml":
717 with output_file.open("w", encoding=DEFAULT_FILE_ENCODING) as file_handle:
718 yaml.dump(new_data, file_handle)
720 return
722 raise ValueError(f"Unknown file format {output_file}")