tools: add more entities and better error handling to parse_companies
Add the remaining lowercase acute accented vowel HTML entities to
parse_companies.pl. On unknown entity, print an error to STDERR so the
maintainer can more clearly understand the failure.
diff --git a/tools/parse_companies.pl b/tools/parse_companies.pl
index 99d3f31..d5b2815 100755
--- a/tools/parse_companies.pl
+++ b/tools/parse_companies.pl
@@ -8,7 +8,11 @@
my %known_entities = (
'nbsp' => ' ',
+ 'aacute' => 'á',
'eacute' => 'é',
+ 'iacute' => 'í',
+ 'oacute' => 'ó',
+ 'uacute' => 'ú',
'auml' => 'ä',
'uuml' => 'ü',
'Uuml' => 'Ü',
@@ -23,8 +27,7 @@
}
foreach my $entity (map { lc $_ } $name =~ /&([^;]+);/g) {
if ($entity ne 'amp') {
- print "Unable to convert &$entity;, giving up\n";
- exit 1;
+ die "\nparse_companies.pl: Unable to convert &$entity; giving up\n";
}
}
$name =~ s/&/&/ig;