Coverage for hdl_registers/parser/parser.py: 97%

229 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-30 20:52 +0000

1# -------------------------------------------------------------------------------------------------- 

2# Copyright (c) Lukas Vik. All rights reserved. 

3# 

4# This file is part of the hdl-registers project, an HDL register generator fast enough to run 

5# in real time. 

6# https://hdl-registers.com 

7# https://github.com/hdl-registers/hdl-registers 

8# -------------------------------------------------------------------------------------------------- 

9 

10from __future__ import annotations 

11 

12import copy 

13import json 

14from typing import TYPE_CHECKING, Any, ClassVar 

15 

16import tomli_w 

17import yaml 

18from tsfpga import DEFAULT_FILE_ENCODING 

19 

20from hdl_registers.about import WEBSITE_URL 

21from hdl_registers.constant.bit_vector_constant import UnsignedVector 

22from hdl_registers.register_list import RegisterList 

23from hdl_registers.register_modes import REGISTER_MODES 

24 

25if TYPE_CHECKING: 

26 from pathlib import Path 

27 

28 from hdl_registers.register import Register 

29 from hdl_registers.register_mode import RegisterMode 

30 

31 

32class RegisterParser: 

33 """ 

34 Parse register data in the form of a dictionary into a :class:`.RegisterList` object. 

35 See :ref:`toml_format` for further documentation. 

36 

37 A note on sanity check strategy: 

38 The parser performs only the basic sanity checks related to the data file format. 

39 For example, missing properties, unknown properties, etc. 

40 A lot of other sanity checks are performed in the :class:`.Register`, :class:`.RegisterArray`, 

41 :class:`.RegisterField`, etc, classes themselves. 

42 

43 For example, the default value of a bit field should be a string with the value "0" or "1". 

44 This is checked in the constructor of the :class:`.Bit` class, not here in the parser. 

45 Similar for a lot of other things. 

46 

47 This is because these objects can be created from the Python API also, without involving 

48 the parser. 

49 Hence these sanity checks have to be present there. 

50 Having them also in the parser would enable better error messages, but would be redundant 

51 and would slow down the parser. 

52 Since the parser is run in real time, the performance is critical, and we can not afford 

53 to slow it down. 

54 """ 

55 

56 # Attributes of the constant. 

57 recognized_constant_items: ClassVar = {"type", "value", "description", "data_type"} 

58 # Note that "type" being present is implied. We would not be parsing a constant unless we 

59 # know it to be a "constant" type. 

60 # So we save some CPU cycles by not checking for it. 

61 required_constant_items: ClassVar = ["value"] 

62 

63 # Attributes of the register. 

64 # Anything apart from these are names of fields. 

65 default_register_items: ClassVar = { 

66 "type", 

67 "mode", 

68 "description", 

69 } 

70 # While a 'mode' is required for a register, it may NOT be specified/changed in the data file 

71 # for a default register. 

72 # Hence this property is handled separately. 

73 # And hence, registers have no required items. 

74 

75 # Attributes of the register array. 

76 # Anything apart from these are names of registers. 

77 default_register_array_items: ClassVar = {"type", "array_length", "description"} 

78 # Note that "type" being present is implied. 

79 # We would not be parsing a register array unless we know it to be a "register_array" type. 

80 # So we save some CPU cycles by not checking for it. 

81 required_register_array_items: ClassVar = ["array_length"] 

82 

83 # Attributes of the "bit" register field. 

84 recognized_bit_items: ClassVar = {"type", "description", "default_value"} 

85 # Note that "type" being present is implied. 

86 # We would not be parsing a bit unless we know it to be a "bit" type. 

87 # So we save some CPU cycles by not checking for it. 

88 required_bit_items: ClassVar[list[str]] = [] 

89 

90 # Attributes of the "bit_vector" register field. 

91 recognized_bit_vector_items: ClassVar = {"type", "description", "width", "default_value"} 

92 # Note that "type" being present is implied. 

93 # We would not be parsing a bit_vector unless we know it to be a "bit_vector" type. 

94 # So we save some CPU cycles by not checking for it. 

95 required_bit_vector_items: ClassVar = ["width"] 

96 

97 # Attributes of the "enumeration" register field. 

98 recognized_enumeration_items: ClassVar = {"type", "description", "default_value", "element"} 

99 # Note that "type" being present is implied. 

100 # We would not be parsing a enumeration unless we know it to be a "enumeration" type. 

101 # So we save some CPU cycles by not checking for it. 

102 required_enumeration_items: ClassVar = ["element"] 

103 

104 # Attributes of the "integer" register field. 

105 recognized_integer_items: ClassVar = { 

106 "type", 

107 "description", 

108 "min_value", 

109 "max_value", 

110 "default_value", 

111 } 

112 # Note that "type" being present is implied. 

113 # We would not be parsing a integer unless we know it to be a "integer" type. 

114 # So we save some CPU cycles by not checking for it. 

115 required_integer_items: ClassVar = ["max_value"] 

116 

117 def __init__( 

118 self, 

119 name: str, 

120 source_definition_file: Path, 

121 default_registers: list[Register] | None = None, 

122 ) -> None: 

123 """ 

124 Arguments: 

125 name: The name of the register list. 

126 source_definition_file: The source file that defined this register list. 

127 Will be displayed in generated source code and documentation 

128 for traceability. 

129 default_registers: List of default registers. 

130 Note that this list with :class:`.Register` objects will be deep copied, so you can 

131 use the same list many times without worrying about mutability. 

132 """ 

133 self._register_list = RegisterList(name=name, source_definition_file=source_definition_file) 

134 self._source_definition_file = source_definition_file 

135 

136 self._default_register_names = [] 

137 if default_registers: 

138 # Perform deep copy of the mutable register objects. 

139 self._register_list.register_objects = copy.deepcopy(default_registers) 

140 for register in default_registers: 

141 self._default_register_names.append(register.name) 

142 

143 def parse(self, register_data: dict[str, Any]) -> RegisterList: 

144 """ 

145 Parse the register data. 

146 

147 Arguments: 

148 register_data: Register data as a dictionary. 

149 Preferably read by the :func:`.from_toml`, :func:`.from_json` or 

150 :func:`.from_yaml` functions. 

151 

152 Return: 

153 The resulting register list. 

154 """ 

155 for old_top_level_key_name in ["constant", "register", "register_array"]: 

156 if old_top_level_key_name in register_data: 

157 source_file = self._source_definition_file 

158 output_file = ( 

159 source_file.parent.resolve() 

160 / f"{source_file.stem}_version_6_format{source_file.suffix}" 

161 ) 

162 

163 print( 

164 f""" 

165ERROR: Parsing register data that appears to be in the old pre-6.0.0 format. 

166ERROR: For more information, see: {WEBSITE_URL}/rst/about/new_data_file_format.html 

167ERROR: Your data will be automatically converted to the new format and saved to: {output_file} 

168ERROR: Please inspect that file and update your data file to the new format. 

169""" 

170 ) 

171 _save_to_new_format(old_data=register_data, output_file=output_file) 

172 raise ValueError("Found register data in old format. See message above.") 

173 

174 parser_methods = { 

175 "constant": self._parse_constant, 

176 "register": self._parse_plain_register, 

177 "register_array": self._parse_register_array, 

178 } 

179 

180 for top_level_name, top_level_items in register_data.items(): 

181 if not isinstance(top_level_items, dict): 

182 message = ( 

183 f"Error while parsing {self._source_definition_file}: " 

184 f'Got unknown top-level property "{top_level_name}".' 

185 ) 

186 # Seems to the linter like a type error, but it is actually the user specifying 

187 # a property/value that they shouldn't. 

188 # Corresponds better to a 'ValueError' than a 'TypeError'. 

189 raise ValueError(message) # noqa: TRY004 

190 

191 top_level_type = top_level_items.get("type", "register") 

192 

193 if top_level_type not in parser_methods: 

194 valid_types_str = ", ".join(f'"{parser_key}"' for parser_key in parser_methods) 

195 message = ( 

196 f'Error while parsing "{top_level_name}" in {self._source_definition_file}: ' 

197 f'Got unknown type "{top_level_type}". Expected one of {valid_types_str}.' 

198 ) 

199 raise ValueError(message) 

200 

201 parser_methods[top_level_type](name=top_level_name, items=top_level_items) 

202 

203 return self._register_list 

204 

205 def _parse_constant(self, name: str, items: dict[str, Any]) -> None: 

206 for item_name in self.required_constant_items: 

207 if item_name not in items: 

208 message = ( 

209 f'Error while parsing constant "{name}" in {self._source_definition_file}: ' 

210 f'Missing required property "{item_name}".' 

211 ) 

212 raise ValueError(message) 

213 

214 for item_name in items: 

215 if item_name not in self.recognized_constant_items: 

216 message = ( 

217 f'Error while parsing constant "{name}" in {self._source_definition_file}: ' 

218 f'Got unknown property "{item_name}".' 

219 ) 

220 raise ValueError(message) 

221 

222 value = items["value"] 

223 description = items.get("description", "") 

224 data_type_str = items.get("data_type") 

225 

226 if data_type_str is not None: 

227 if not isinstance(value, str): 

228 raise ValueError( 

229 f'Error while parsing constant "{name}" in ' 

230 f"{self._source_definition_file}: " 

231 'May not set "data_type" for non-string constant.' 

232 ) 

233 

234 if data_type_str == "unsigned": 

235 value = UnsignedVector(value) 

236 else: 

237 raise ValueError( 

238 f'Error while parsing constant "{name}" in ' 

239 f"{self._source_definition_file}: " 

240 f'Invalid data type "{data_type_str}".' 

241 ) 

242 

243 self._register_list.add_constant(name=name, value=value, description=description) 

244 

245 def _parse_plain_register(self, name: str, items: dict[str, Any]) -> None: 

246 description = items.get("description", "") 

247 

248 if name in self._default_register_names: 

249 # Default registers can be "updated" in the sense that the user can set a custom 

250 # 'description' and add whatever fields they want in the current register list. 

251 # They may not, however, change the 'mode' which is part of the default definition. 

252 if "mode" in items: 

253 message = ( 

254 f'Error while parsing register "{name}" in {self._source_definition_file}: ' 

255 'A "mode" may not be specified for a default register.' 

256 ) 

257 raise ValueError(message) 

258 

259 register = self._register_list.get_register(register_name=name) 

260 register.description = description 

261 

262 else: 

263 # If it is a new register however, the 'mode' has to be specified. 

264 if "mode" not in items: 

265 message = ( 

266 f'Error while parsing register "{name}" in {self._source_definition_file}: ' 

267 f'Missing required property "mode".' 

268 ) 

269 raise ValueError(message) 

270 

271 mode = self._get_mode(mode_name=items["mode"], register_name=name) 

272 

273 register = self._register_list.append_register( 

274 name=name, mode=mode, description=description 

275 ) 

276 

277 self._parse_register_fields(register=register, register_items=items, register_array_note="") 

278 

279 def _get_mode(self, mode_name: str, register_name: str) -> RegisterMode: 

280 if mode_name in REGISTER_MODES: 

281 return REGISTER_MODES[mode_name] 

282 

283 valid_modes_str = ", ".join(f'"{mode_key}"' for mode_key in REGISTER_MODES) 

284 message = ( 

285 f'Error while parsing register "{register_name}" in {self._source_definition_file}: ' 

286 f'Got unknown mode "{mode_name}". Expected one of {valid_modes_str}.' 

287 ) 

288 raise ValueError(message) 

289 

290 def _parse_register_fields( 

291 self, 

292 register_items: dict[str, Any], 

293 register: Register, 

294 register_array_note: str, 

295 ) -> None: 

296 # Add any fields that are specified. 

297 for item_name, item_value in register_items.items(): 

298 # Skip default items so we only get the fields. 

299 if item_name in self.default_register_items: 

300 continue 

301 

302 if not isinstance(item_value, dict): 

303 message = ( 

304 f'Error while parsing register "{register.name}"{register_array_note} ' 

305 f"in {self._source_definition_file}: " 

306 f'Got unknown property "{item_name}".' 

307 ) 

308 # Seems to the linter like a type error, but it is actually the user specifying 

309 # a property/value that they shouldn't. 

310 # Corresponds better to a 'ValueError' than a 'TypeError'. 

311 raise ValueError(message) # noqa: TRY004 

312 

313 if "type" not in item_value: 

314 message = ( 

315 f'Error while parsing field "{item_name}" in register ' 

316 f'"{register.name}"{register_array_note} in {self._source_definition_file}: ' 

317 'Missing required property "type".' 

318 ) 

319 raise ValueError(message) 

320 

321 field_type = item_value["type"] 

322 

323 parser_methods = { 

324 "bit": self._parse_bit, 

325 "bit_vector": self._parse_bit_vector, 

326 "enumeration": self._parse_enumeration, 

327 "integer": self._parse_integer, 

328 } 

329 

330 if field_type not in parser_methods: 

331 valid_types_str = ", ".join(f'"{parser_key}"' for parser_key in parser_methods) 

332 message = ( 

333 f'Error while parsing field "{item_name}" in register ' 

334 f'"{register.name}"{register_array_note} in {self._source_definition_file}: ' 

335 f'Unknown field type "{field_type}". Expected one of {valid_types_str}.' 

336 ) 

337 raise ValueError(message) 

338 

339 parser_methods[field_type]( 

340 register=register, field_name=item_name, field_items=item_value 

341 ) 

342 

343 def _parse_register_array(self, name: str, items: dict[str, Any]) -> None: 

344 for required_property in self.required_register_array_items: 

345 if required_property not in items: 

346 message = ( 

347 f'Error while parsing register array "{name}" in ' 

348 f"{self._source_definition_file}: " 

349 f'Missing required property "{required_property}".' 

350 ) 

351 raise ValueError(message) 

352 

353 register_array_length = items["array_length"] 

354 register_array_description = items.get("description", "") 

355 register_array = self._register_list.append_register_array( 

356 name=name, length=register_array_length, description=register_array_description 

357 ) 

358 

359 # Add all registers that are specified. 

360 found_at_least_one_register = False 

361 for item_name, item_value in items.items(): 

362 # Skip default items so we only get the registers. 

363 if item_name in self.default_register_array_items: 

364 continue 

365 

366 found_at_least_one_register = True 

367 

368 if not isinstance(item_value, dict): 

369 message = ( 

370 f'Error while parsing register array "{name}" in ' 

371 f"{self._source_definition_file}: " 

372 f'Got unknown property "{item_name}".' 

373 ) 

374 # Seems to the linter like a type error, but it is actually the user specifying 

375 # a property/value that they shouldn't. 

376 # Corresponds better to a 'ValueError' than a 'TypeError'. 

377 raise ValueError(message) # noqa: TRY004 

378 

379 item_type = item_value.get("type", "register") 

380 if item_type != "register": 

381 message = ( 

382 f'Error while parsing register "{item_name}" within array "{name}" in ' 

383 f"{self._source_definition_file}: " 

384 f'Got unknown type "{item_type}". Expected "register".' 

385 ) 

386 raise ValueError(message) 

387 

388 # A 'mode' is semi-required for plain registers, but always required for 

389 # array registers. 

390 if "mode" not in item_value: 

391 raise ValueError( 

392 f'Error while parsing register "{item_name}" within array "{name}" in ' 

393 f"{self._source_definition_file}: " 

394 f'Missing required property "mode".' 

395 ) 

396 register_mode = self._get_mode(mode_name=item_value["mode"], register_name=item_name) 

397 

398 register_description = item_value.get("description", "") 

399 

400 register = register_array.append_register( 

401 name=item_name, mode=register_mode, description=register_description 

402 ) 

403 

404 self._parse_register_fields( 

405 register_items=item_value, 

406 register=register, 

407 register_array_note=f' within array "{name}"', 

408 ) 

409 

410 if not found_at_least_one_register: 

411 message = ( 

412 f'Error while parsing register array "{name}" in {self._source_definition_file}: ' 

413 "Array must contain at least one register." 

414 ) 

415 raise ValueError(message) 

416 

417 def _check_field_items( 

418 self, 

419 register_name: str, 

420 field_name: str, 

421 field_items: dict[str, Any], 

422 recognized_items: set[str], 

423 required_items: list[str], 

424 ) -> None: 

425 """ 

426 Will raise exception if anything is wrong. 

427 """ 

428 for item_name in required_items: 

429 if item_name not in field_items: 

430 message = ( 

431 f'Error while parsing field "{field_name}" in register "{register_name}" in ' 

432 f"{self._source_definition_file}: " 

433 f'Missing required property "{item_name}".' 

434 ) 

435 raise ValueError(message) 

436 

437 for item_name in field_items: 

438 if item_name not in recognized_items: 

439 message = ( 

440 f'Error while parsing field "{field_name}" in register ' 

441 f'"{register_name}" in {self._source_definition_file}: ' 

442 f'Unknown property "{item_name}".' 

443 ) 

444 raise ValueError(message) 

445 

446 def _parse_bit(self, register: Register, field_name: str, field_items: dict[str, Any]) -> None: 

447 self._check_field_items( 

448 register_name=register.name, 

449 field_name=field_name, 

450 field_items=field_items, 

451 recognized_items=self.recognized_bit_items, 

452 required_items=self.required_bit_items, 

453 ) 

454 

455 description = field_items.get("description", "") 

456 default_value = field_items.get("default_value", "0") 

457 

458 register.append_bit(name=field_name, description=description, default_value=default_value) 

459 

460 def _parse_bit_vector( 

461 self, register: Register, field_name: str, field_items: dict[str, Any] 

462 ) -> None: 

463 self._check_field_items( 

464 register_name=register.name, 

465 field_name=field_name, 

466 field_items=field_items, 

467 recognized_items=self.recognized_bit_vector_items, 

468 required_items=self.required_bit_vector_items, 

469 ) 

470 

471 width = field_items["width"] 

472 

473 description = field_items.get("description", "") 

474 default_value = field_items.get("default_value", 0) 

475 

476 register.append_bit_vector( 

477 name=field_name, description=description, width=width, default_value=default_value 

478 ) 

479 

480 def _parse_enumeration( 

481 self, register: Register, field_name: str, field_items: dict[str, Any] 

482 ) -> None: 

483 self._check_field_items( 

484 register_name=register.name, 

485 field_name=field_name, 

486 field_items=field_items, 

487 recognized_items=self.recognized_enumeration_items, 

488 # Check that we have at least one element. 

489 # This is checked also in the Enumeration class, which is needed if the user 

490 # is working directly with the Python API. 

491 # That is where we usually sanity check, to avoid duplication. 

492 # However, this particular check is needed here also since the logic for default 

493 # value below does not work if there are no elements. 

494 required_items=self.required_enumeration_items, 

495 ) 

496 

497 description = field_items.get("description", "") 

498 # We assert above that the enumeration has at least one element. 

499 # Meaning that the result of this get can not be None. 

500 elements: dict[str, str] = field_items.get("element") 

501 

502 # The default "default value" is the first declared enumeration element. 

503 # Note that this works because dictionaries in Python are guaranteed ordered since 

504 # Python 3.7. 

505 default_value = field_items.get("default_value", next(iter(elements))) 

506 

507 register.append_enumeration( 

508 name=field_name, 

509 description=description, 

510 elements=elements, 

511 default_value=default_value, 

512 ) 

513 

514 def _parse_integer( 

515 self, register: Register, field_name: str, field_items: dict[str, Any] 

516 ) -> None: 

517 self._check_field_items( 

518 register_name=register.name, 

519 field_name=field_name, 

520 field_items=field_items, 

521 recognized_items=self.recognized_integer_items, 

522 required_items=self.required_integer_items, 

523 ) 

524 

525 max_value = field_items["max_value"] 

526 

527 description = field_items.get("description", "") 

528 min_value = field_items.get("min_value", 0) 

529 default_value = field_items.get("default_value", min_value) 

530 

531 register.append_integer( 

532 name=field_name, 

533 description=description, 

534 min_value=min_value, 

535 max_value=max_value, 

536 default_value=default_value, 

537 ) 

538 

539 

540def _convert_to_new_format( # noqa: C901 

541 old_data: dict[str, Any], 

542) -> dict[str, Any]: 

543 """ 

544 Convert pre-6.0.0 format to the new format. 

545 This is a semi-trash function that will be removed in the future. 

546 """ 

547 

548 def _get_register_dict(register_items: dict[str, Any]) -> dict[str, Any]: 

549 register_dict = {} 

550 

551 for register_item_name, register_item_value in register_items.items(): 

552 if register_item_name in RegisterParser.default_register_items: 

553 register_dict[register_item_name] = register_item_value 

554 

555 elif register_item_name in ["bit", "bit_vector", "enumeration", "integer"]: 

556 for field_name, field_items in register_item_value.items(): 

557 field_dict = {"type": register_item_name} 

558 field_dict.update(dict(field_items.items())) 

559 

560 register_dict[field_name] = field_dict 

561 

562 else: 

563 raise ValueError( 

564 f"Unknown item {register_item_name}. Looks like an error in the user data file." 

565 ) 

566 

567 return register_dict 

568 

569 result = {} 

570 

571 def _add_item(name: str, items: dict[str, Any]) -> None: 

572 if name in result: 

573 raise ValueError(f"Duplicate item {name}") 

574 

575 result[name] = items 

576 

577 if "register" in old_data: 

578 for register_name, register_items in old_data["register"].items(): 

579 register_dict = _get_register_dict(register_items=register_items) 

580 _add_item(name=register_name, items=register_dict) 

581 

582 if "register_array" in old_data: 

583 for register_array_name, register_array_items in old_data["register_array"].items(): 

584 register_array_dict: dict[str, Any] = {"type": "register_array"} 

585 

586 for register_array_item_name, register_array_item_value in register_array_items.items(): 

587 if register_array_item_name in RegisterParser.default_register_array_items: 

588 register_array_dict[register_array_item_name] = register_array_item_value 

589 

590 elif register_array_item_name == "register": 

591 for register_name, register_items in register_array_item_value.items(): 

592 register_array_dict[register_name] = _get_register_dict( 

593 register_items=register_items 

594 ) 

595 

596 else: 

597 raise ValueError( 

598 f"Unknown item {register_array_item_name}. " 

599 "Looks like an error in the user data file." 

600 ) 

601 

602 _add_item(name=register_array_name, items=register_array_dict) 

603 

604 if "constant" in old_data: 

605 for constant_name, constant_items in old_data["constant"].items(): 

606 constant_dict = {"type": "constant"} 

607 constant_dict.update(dict(constant_items.items())) 

608 

609 _add_item(name=constant_name, items=constant_dict) 

610 

611 return result 

612 

613 

614def _save_to_new_format(old_data: dict[str, Any], output_file: Path) -> None: 

615 """ 

616 Save the old data to the new format. 

617 """ 

618 new_data = _convert_to_new_format(old_data=old_data) 

619 

620 if output_file.suffix == ".toml": 

621 with output_file.open("wb") as file_handle: 

622 tomli_w.dump(new_data, file_handle, multiline_strings=True) 

623 

624 return 

625 

626 if output_file.suffix == ".json": 

627 with output_file.open("w", encoding=DEFAULT_FILE_ENCODING) as file_handle: 

628 json.dump(new_data, file_handle, indent=4) 

629 

630 return 

631 

632 if output_file.suffix == ".yaml": 

633 with output_file.open("w", encoding=DEFAULT_FILE_ENCODING) as file_handle: 

634 yaml.dump(new_data, file_handle) 

635 

636 return 

637 

638 raise ValueError(f"Unknown file format {output_file}")