| # $Id: Converter.pm 6991 2016-02-06 12:16:13Z gavin $ |
| # Converter.pm: Common code for Converters. |
| # |
| # Copyright 2011, 2012, 2013, 2014, 2015, 2016 Free Software Foundation, Inc. |
| # |
| # This program is free software; you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation; either version 3 of the License, |
| # or (at your option) any later version. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| # |
| # Original author: Patrice Dumas <pertusus@free.fr> |
| |
| package Texinfo::Convert::Converter; |
| |
| use 5.00405; |
| use strict; |
| |
| # for fileparse |
| use File::Basename; |
| # for file names portability |
| use File::Spec; |
| use Encode; |
| |
| use Texinfo::Report; |
| use Texinfo::Common; |
| use Texinfo::Convert::Text; |
| use Texinfo::Convert::Texinfo; |
| use Texinfo::Structuring; |
| |
| use Carp qw(cluck); |
| |
| require Exporter; |
| use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
| @ISA = qw(Exporter Texinfo::Report); |
| |
| # Items to export into callers namespace by default. Note: do not export |
| # names by default without a very good reason. Use EXPORT_OK instead. |
| # Do not simply export all your public functions/methods/constants. |
| |
| # This allows declaration use Texinfo::Convert::Converter ':all'; |
| # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
| # will save memory. |
| %EXPORT_TAGS = ( 'all' => [ qw( |
| xml_protect_text |
| xml_comment |
| xml_accent |
| xml_accents |
| ) ] ); |
| |
| @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
| |
| @EXPORT = qw( |
| ); |
| |
| $VERSION = '6.1'; |
| |
| my %defaults = ( |
| 'ENABLE_ENCODING' => 1, |
| 'OUTFILE' => undef, |
| 'SUBDIR' => undef, |
| 'documentlanguage' => undef, |
| ); |
| |
| # defaults for all converters. Maybe more could be added, especially what |
| # can be set with --set and should be the same for all the formats. |
| our %all_converters_defaults = ( |
| 'htmlxref_files' => undef, |
| 'output_format' => undef, |
| 'SPLIT_SIZE' => 300000, |
| 'paragraphindent' => 3, |
| 'fillcolumn' => 72, |
| 'expanded_formats' => undef, |
| 'include_directories' => undef, |
| 'IMAGE_LINK_PREFIX' => undef, |
| 'NUMBER_SECTIONS' => 1, |
| 'NUMBER_FOOTNOTES' => 1, |
| 'frenchspacing' => 'off', |
| 'paragraphindent' => 3, |
| 'firstparagraphindent' => 'none', |
| 'allowcodebreaks' => 'true', |
| 'footnotestyle' => 'end', |
| 'deftypefnnewline' => 'off', |
| 'BASEFILENAME_LENGTH' => 255 - 10, |
| # This is not used much as converters do their best to give a sane output |
| 'FIX_TEXINFO' => 1, |
| # This is the default, mainly for tests; the caller should set them. These |
| # values are what is used in tests of the Converters. |
| 'PACKAGE_AND_VERSION' => 'texinfo', |
| 'PACKAGE_VERSION' => '', |
| 'PACKAGE_URL' => 'http://www.gnu.org/software/texinfo/', |
| 'PROGRAM' => '', |
| |
| 'DEBUG' => 0, |
| 'TEST' => 0, |
| 'translated_commands' => {'error' => 'error@arrow{}',}, |
| 'TEXINFO_COLUMN_FOR_DESCRIPTION' => 32, # same as emacs |
| ); |
| |
| # For translation of in document string. |
| if (0) { |
| my $self; |
| $self->gdt('error@arrow{}'); |
| } |
| |
| sub converter_defaults($$) |
| { |
| return %defaults; |
| } |
| |
| sub converter_initialize($) |
| { |
| } |
| |
| sub converter_global_commands($) |
| { |
| return ('documentlanguage', 'documentencoding'); |
| } |
| |
| sub output_internal_links($) |
| { |
| my $self = shift; |
| return undef; |
| } |
| |
| sub _informative_command_value($$) |
| { |
| my $self = shift; |
| my $root = shift; |
| |
| my $cmdname = $root->{'cmdname'}; |
| |
| if ($Texinfo::Common::misc_commands{$cmdname} eq 'skipline') { |
| return 1; |
| } elsif (exists($root->{'extra'}->{'text_arg'})) { |
| return $root->{'extra'}->{'text_arg'}; |
| } elsif ($root->{'extra'} and $root->{'extra'}->{'misc_args'} |
| and exists($root->{'extra'}->{'misc_args'}->[0])) { |
| return $root->{'extra'}->{'misc_args'}->[0]; |
| } |
| return undef; |
| } |
| |
| # FIXME documentencoding handling is not reverted by resetting |
| # a value with set_conf, so _unset_global_multiple_commands won't |
| # reverse what _set_global_multiple_commands did through |
| # _informative_command. |
| sub _informative_command($$) |
| { |
| my $self = shift; |
| my $root = shift; |
| |
| my $cmdname = $root->{'cmdname'}; |
| $cmdname = 'shortcontents' if ($cmdname eq 'summarycontents'); |
| |
| return if ($self->{'set'}->{$cmdname}); |
| |
| my $value = $self->_informative_command_value($root); |
| if (defined($value)) { |
| $self->set_conf($cmdname, $value); |
| } |
| } |
| |
| sub register_close_file($$) |
| { |
| my $self = shift; |
| my $filename = shift; |
| if ($self->{'unclosed_files'}->{$filename}) { |
| delete $self->{'unclosed_files'}->{$filename}; |
| } else { |
| cluck "$filename not opened\n"; |
| } |
| } |
| |
| sub converter(;$) |
| { |
| my $class = shift; |
| my $converter = { 'set' => {} }; |
| |
| my $conf; |
| my $name = 'converter'; |
| |
| if (ref($class) eq 'HASH') { |
| $conf = $class; |
| bless $converter; |
| } elsif (defined($class)) { |
| bless $converter, $class; |
| $name = ref($converter); |
| $conf = shift; |
| } else { |
| bless $converter; |
| $conf = shift; |
| $name = ref($converter); |
| } |
| my %defaults = $converter->converter_defaults($conf); |
| foreach my $key (keys(%all_converters_defaults)) { |
| $defaults{$key} = $all_converters_defaults{$key} |
| if (!exists($defaults{$key})); |
| } |
| foreach my $key (keys(%defaults)) { |
| if (Texinfo::Common::valid_option($key)) { |
| $converter->{'conf'}->{$key} = $defaults{$key}; |
| } else { |
| $converter->{$key} = $defaults{$key}; |
| } |
| } |
| $converter->{'conf_default'} = \%defaults; |
| if (defined($conf)) { |
| if ($conf->{'parser'}) { |
| $converter->{'parser'} = $conf->{'parser'}; |
| $converter->{'extra'} |
| = $converter->{'parser'}->global_commands_information(); |
| $converter->{'info'} = $converter->{'parser'}->global_informations(); |
| if ($converter->{'info'} |
| and $converter->{'info'}->{'input_perl_encoding'} |
| and !defined($conf->{'INPUT_PERL_ENCODING'})) { |
| $conf->{'INPUT_PERL_ENCODING'} |
| = $converter->{'info'}->{'input_perl_encoding'}; |
| } |
| if ($converter->{'info'} |
| and $converter->{'info'}->{'input_encoding_name'} |
| and !defined($conf->{'INPUT_ENCODING_NAME'})) { |
| $conf->{'INPUT_ENCODING_NAME'} |
| = $converter->{'info'}->{'input_encoding_name'}; |
| } |
| my $floats = $converter->{'parser'}->floats_information(); |
| my $labels = $converter->{'parser'}->labels_information(); |
| $converter->{'structuring'} = $converter->{'parser'}->{'structuring'}; |
| |
| $converter->{'floats'} = $floats if ($floats); |
| $converter->{'labels'} = $labels if ($labels); |
| foreach my $global_command ($converter->converter_global_commands()) { |
| if (defined($converter->{'extra'}->{$global_command})) { |
| my $root = $converter->{'extra'}->{$global_command}; |
| # always set unique commands |
| if (ref($root) ne 'ARRAY') { |
| $converter->_informative_command($root); |
| } |
| } |
| } |
| $converter->set_conf('setcontentsaftertitlepage', 1) |
| if ($converter->get_conf('contents') |
| and $converter->{'extra'}->{'setcontentsaftertitlepage'} |
| and $converter->{'structuring'} |
| and $converter->{'structuring'}->{'sectioning_root'}); |
| $converter->set_conf('setshortcontentsaftertitlepage', 1) |
| if ($converter->get_conf('shortcontents') |
| and $converter->{'extra'}->{'setshortcontentsaftertitlepage'} |
| and $converter->{'structuring'} |
| and $converter->{'structuring'}->{'sectioning_root'}); |
| $converter->{'gettext'} = $converter->{'parser'}->{'gettext'}; |
| $converter->{'pgettext'} = $converter->{'parser'}->{'pgettext'}; |
| delete $conf->{'parser'}; |
| } |
| foreach my $key (keys(%$conf)) { |
| if (Texinfo::Common::valid_option($key)) { |
| $converter->{'conf'}->{$key} = $conf->{$key}; |
| } elsif (!exists($defaults{$key})) { |
| warn "$key not a possible configuration in $name\n"; |
| } else { |
| $converter->{$key} = $conf->{$key}; |
| } |
| $converter->{'set'}->{$key} = 1; |
| } |
| } |
| $converter->set_conf('OUTPUT_ENCODING_NAME', |
| $converter->get_conf('INPUT_ENCODING_NAME')) |
| if ($converter->get_conf('INPUT_ENCODING_NAME')); |
| if (!$converter->get_conf('OUTPUT_PERL_ENCODING') |
| and $converter->get_conf('OUTPUT_ENCODING_NAME')) { |
| my $perl_encoding |
| = Encode::resolve_alias($converter->get_conf('OUTPUT_ENCODING_NAME')); |
| if ($perl_encoding) { |
| $converter->set_conf('OUTPUT_PERL_ENCODING', $perl_encoding); |
| } |
| } |
| if (!defined($converter->{'expanded_formats'})) { |
| if ($converter->{'parser'}) { |
| $converter->{'expanded_formats'} = $converter->{'parser'}->{'expanded_formats'}; |
| } else { |
| $converter->{'expanded_formats'} = []; |
| } |
| } |
| if (!defined($converter->{'include_directories'})) { |
| if ($converter->{'parser'}) { |
| $converter->{'include_directories'} = $converter->{'parser'}->{'include_directories'}; |
| } else { |
| $converter->{'include_directories'} = [ '.' ]; |
| } |
| } |
| |
| # turn the array to a hash for speed. Not sure it really matters for such |
| # a small array. |
| foreach my $expanded_format(@{$converter->{'expanded_formats'}}) { |
| $converter->{'expanded_formats_hash'}->{$expanded_format} = 1; |
| } |
| |
| $converter->Texinfo::Report::new(); |
| |
| $converter->converter_initialize(); |
| |
| return $converter; |
| } |
| |
| sub converter_unclosed_files($) |
| { |
| my $self = shift; |
| return $self->{'unclosed_files'}; |
| } |
| |
| sub converter_opened_files($) |
| { |
| my $self = shift; |
| if (defined($self->{'opened_files'})) { |
| return @{$self->{'opened_files'}}; |
| } else { |
| return (); |
| } |
| } |
| |
| sub _set_global_multiple_commands($;$) |
| { |
| my $self = shift; |
| my $multiple_commands_index = shift; |
| $multiple_commands_index = 0 if (!defined($multiple_commands_index)); |
| |
| foreach my $global_command ($self->converter_global_commands()) { |
| if (defined($self->{'extra'}->{$global_command}) |
| and ref($self->{'extra'}->{$global_command}) eq 'ARRAY') { |
| my $root = $self->{'extra'}->{$global_command}->[$multiple_commands_index]; |
| if ($self->get_conf('DEBUG')) { |
| print STDERR "SET_global_multiple_commands($multiple_commands_index) $global_command\n"; |
| } |
| $self->_informative_command($root); |
| } |
| } |
| } |
| |
| # Notice that set_conf is used, which means that it is not possible to |
| # customize what is done for those commands. For documentencoding, for |
| # example the values are not reset correctly, see the FIXME above. |
| sub _unset_global_multiple_commands($) |
| { |
| my $self = shift; |
| |
| foreach my $global_command ($self->converter_global_commands()) { |
| if (defined($self->{'extra'}->{$global_command}) |
| and ref($self->{'extra'}->{$global_command}) eq 'ARRAY') { |
| if (exists($self->{'conf_default'}->{$global_command})) { |
| if ($self->get_conf('DEBUG')) { |
| my $default = 'UNDEF'; |
| $default = $self->{'conf_default'}->{$global_command} |
| if (defined($self->{'conf_default'}->{$global_command})); |
| my $set = 0; |
| $set = 1 if ($self->{'set'}->{$global_command}); |
| print STDERR "UNSET_global_multiple_commands $global_command ($set): $default\n"; |
| } |
| if (Texinfo::Common::valid_option($global_command)) { |
| $self->set_conf($global_command, $self->{'conf_default'}->{$global_command}); |
| } else { |
| $self->{$global_command} = $self->{'conf_default'}->{$global_command}; |
| } |
| } |
| } |
| } |
| } |
| |
| sub get_conf($$) |
| { |
| my $self = shift; |
| my $conf = shift; |
| if (!Texinfo::Common::valid_option($conf)) { |
| warn "BUG: unknown option $conf\n"; |
| return undef; |
| } |
| return $self->{'conf'}->{$conf}; |
| } |
| |
| sub set_conf($$$) |
| { |
| my $self = shift; |
| my $conf = shift; |
| my $value = shift; |
| if (!Texinfo::Common::valid_option($conf)) { |
| warn "BUG: unknown option $conf\n"; |
| return undef; |
| } elsif (Texinfo::Common::obsolete_option($conf)) { |
| warn(sprintf(main::__("Obsolete variable %s\n"), $conf)); |
| } |
| if ($self->{'set'}->{$conf}) { |
| return 0; |
| } else { |
| $self->{'conf'}->{$conf} = $value; |
| return 1; |
| } |
| } |
| |
| sub force_conf($$$) |
| { |
| my $self = shift; |
| my $conf = shift; |
| my $value = shift; |
| if (!Texinfo::Common::valid_option($conf)) { |
| warn "BUG: unknown option $conf\n"; |
| return undef; |
| } elsif (Texinfo::Common::obsolete_option($conf)) { |
| warn(sprintf(main::__("Obsolete variable %s\n"), $conf)); |
| } |
| $self->{'conf'}->{$conf} = $value; |
| return 1; |
| } |
| |
| my $STDIN_DOCU_NAME = 'stdin'; |
| |
| sub _set_outfile($$$) |
| { |
| my $self = shift; |
| |
| # determine input file base name |
| my $input_basename; |
| if (defined($self->{'info'}->{'input_file_name'})) { |
| my ($directories, $suffix); |
| ($input_basename, $directories, $suffix) |
| = fileparse($self->{'info'}->{'input_file_name'}); |
| } else { |
| # This could happen if called on a piece of texinfo |
| $input_basename = ''; |
| } |
| $self->{'input_basename'} = $input_basename; |
| if ($input_basename eq '-') { |
| $input_basename = $STDIN_DOCU_NAME |
| } else { |
| $input_basename =~ s/\.te?x(i|info)?$//; |
| } |
| $self->{'input_basename_name'} = $input_basename; |
| |
| my $setfilename; |
| if (defined($self->get_conf('setfilename'))) { |
| $setfilename = $self->get_conf('setfilename'); |
| } elsif ($self->{'extra'} and $self->{'extra'}->{'setfilename'} |
| and $self->{'extra'}->{'setfilename'}->{'extra'} |
| and defined($self->{'extra'}->{'setfilename'}->{'extra'}->{'text_arg'})) { |
| $setfilename = $self->{'extra'}->{'setfilename'}->{'extra'}->{'text_arg'} |
| } |
| |
| # PREFIX overrides both setfilename and the input file base name |
| if (defined($self->get_conf('PREFIX'))) { |
| $setfilename = undef; |
| $input_basename = $self->get_conf('PREFIX'); |
| } |
| |
| # the name of the document, which is more or less the basename, without |
| # extension |
| my $document_name; |
| # determine output file and output file name |
| my $outfile; |
| if (!defined($self->get_conf('OUTFILE'))) { |
| if (defined($setfilename) and !$self->get_conf('NO_USE_SETFILENAME')) { |
| $outfile = $setfilename; |
| $document_name = $setfilename; |
| $document_name =~ s/\.[^\.]*$//; |
| if (!$self->get_conf('USE_SETFILENAME_EXTENSION')) { |
| $outfile =~ s/\.[^\.]*$//; |
| $outfile .= '.'.$self->get_conf('EXTENSION') |
| if (defined($self->get_conf('EXTENSION')) |
| and $self->get_conf('EXTENSION') ne ''); |
| } |
| } elsif ($input_basename ne '') { |
| $outfile = $input_basename; |
| $document_name = $input_basename; |
| $outfile .= '.'.$self->get_conf('EXTENSION') |
| if (defined($self->get_conf('EXTENSION')) |
| and $self->get_conf('EXTENSION') ne ''); |
| } else { |
| $outfile = ''; |
| $document_name = $outfile; |
| } |
| if (defined($self->get_conf('SUBDIR')) and $outfile ne '') { |
| my $dir = File::Spec->canonpath($self->get_conf('SUBDIR')); |
| $outfile = File::Spec->catfile($dir, $outfile); |
| } |
| #$self->set_conf('OUTFILE', $outfile); |
| } else { |
| $document_name = $self->get_conf('OUTFILE'); |
| $document_name =~ s/\.[^\.]*$//; |
| $outfile = $self->get_conf('OUTFILE'); |
| } |
| |
| # the output file without directories part. |
| my $output_filename = $outfile; |
| # this is a case that should happen rarely: one wants to get |
| # the result in a string and there is a setfilename. |
| if ($outfile eq '' and defined($setfilename) |
| and !$self->get_conf('NO_USE_SETFILENAME')) { |
| $output_filename = $setfilename; |
| $document_name = $setfilename; |
| $document_name =~ s/\.[^\.]*$//; |
| } |
| my ($directories, $suffix); |
| # We may be handling setfilename there, so it is not obvious that we |
| # want to use fileparse and not consider unixish separators. However, |
| # if this is setfilename, it should be a simple file name, so it |
| # should hopefully be harmless to use fileparse |
| ($document_name, $directories, $suffix) = fileparse($document_name); |
| $self->{'document_name'} = $document_name; |
| ($output_filename, $directories, $suffix) = fileparse($output_filename); |
| $self->{'output_filename'} = $output_filename; |
| if ($self->get_conf('SPLIT')) { |
| if (defined($self->get_conf('OUTFILE'))) { |
| $self->{'destination_directory'} = $self->get_conf('OUTFILE'); |
| } elsif (defined($self->get_conf('SUBDIR'))) { |
| $self->{'destination_directory'} = $self->get_conf('SUBDIR'); |
| } else { |
| $self->{'destination_directory'} = $document_name; |
| } |
| } else { |
| my ($out_filename, $output_dir, $suffix) = fileparse($outfile); |
| if ($output_dir ne '') { |
| $self->{'destination_directory'} = $output_dir; |
| } |
| } |
| if (defined($self->{'destination_directory'}) |
| and $self->{'destination_directory'} ne '') { |
| $self->{'destination_directory'} |
| = File::Spec->canonpath($self->{'destination_directory'}); |
| } |
| $self->{'output_file'} = $outfile; |
| } |
| |
| sub _id_to_filename($$) |
| { |
| my $self = shift; |
| my $id = shift; |
| return substr($id, 0, $self->get_conf('BASEFILENAME_LENGTH')); |
| } |
| |
| sub _sectioning_command_normalized_filename($$) |
| { |
| my $self = shift; |
| my $command = shift; |
| my $no_unidecode; |
| |
| $no_unidecode = 1 if (defined($self->get_conf('USE_UNIDECODE')) |
| and !$self->get_conf('USE_UNIDECODE')); |
| |
| my $normalized_name = Texinfo::Convert::NodeNameNormalization::transliterate_texinfo( |
| {'contents' => $command->{'extra'}->{'misc_content'}}, |
| $no_unidecode); |
| |
| my $filename = $self->_id_to_filename($normalized_name); |
| $filename .= '.'.$self->get_conf('EXTENSION') |
| if (defined($self->get_conf('EXTENSION')) |
| and $self->get_conf('EXTENSION') ne ''); |
| return ($normalized_name, $filename); |
| } |
| |
| sub _node_filename($$) |
| { |
| my $self = shift; |
| my $node_info = shift; |
| |
| my $no_unidecode; |
| $no_unidecode = 1 if (defined($self->get_conf('USE_UNIDECODE')) |
| and !$self->get_conf('USE_UNIDECODE')); |
| |
| my $filename; |
| if (defined($node_info->{'normalized'})) { |
| if ($self->get_conf('TRANSLITERATE_FILE_NAMES')) { |
| $filename = Texinfo::Convert::NodeNameNormalization::transliterate_texinfo( |
| {'contents' => $node_info->{'node_content'}}, |
| $no_unidecode); |
| } else { |
| $filename = $node_info->{'normalized'}; |
| } |
| } else { |
| $filename = ''; |
| } |
| $filename = $self->_id_to_filename($filename); |
| return $filename; |
| } |
| |
| sub _set_element_file($$$) |
| { |
| my $self = shift; |
| my $element = shift; |
| my $filename = shift; |
| |
| if (!defined($filename)) { |
| cluck("_set_element_file: filename not defined\n"); |
| } |
| if ($self->get_conf('CASE_INSENSITIVE_FILENAMES')) { |
| if (exists($self->{'filenames'}->{lc($filename)})) { |
| if ($self->get_conf('DEBUG')) { |
| print STDERR "Reusing ".$self->{'filenames'}->{lc($filename)} |
| ." for $filename\n"; |
| } |
| $filename = $self->{'filenames'}->{lc($filename)}; |
| } else { |
| $self->{'filenames'}->{lc($filename)} = $filename; |
| } |
| } |
| $element->{'filename'} = $filename; |
| if (defined($self->{'destination_directory'}) |
| and $self->{'destination_directory'} ne '') { |
| $element->{'out_filename'} = |
| File::Spec->catfile($self->{'destination_directory'}, $filename); |
| } else { |
| $element->{'out_filename'} = $filename; |
| } |
| } |
| |
| sub _top_node_filename($) |
| { |
| my $self = shift; |
| |
| my $top_node_filename; |
| if (defined($self->get_conf('TOP_FILE')) |
| and $self->get_conf('TOP_FILE') ne '') { |
| $top_node_filename = $self->get_conf('TOP_FILE'); |
| } else { |
| if (defined($self->get_conf('TOP_NODE_FILE'))) { |
| $top_node_filename = $self->get_conf('TOP_NODE_FILE'); |
| } else { |
| # TOP_NODE_FILE is set in the default case. |
| # If not the manual name is used. |
| $top_node_filename = $self->{'document_name'}; |
| } |
| if (defined($top_node_filename)) { |
| my $top_node_extension; |
| if ($self->get_conf('NODE_FILENAMES')) { |
| $top_node_extension = $self->get_conf('NODE_FILE_EXTENSION'); |
| } else { |
| $top_node_extension = $self->get_conf('EXTENSION'); |
| } |
| $top_node_filename .= '.'.$top_node_extension |
| if (defined($top_node_extension) and $top_node_extension ne ''); |
| } |
| } |
| return $top_node_filename; |
| } |
| |
| sub _get_element($$) |
| { |
| my $self = shift; |
| my $command = shift; |
| my $find_container = shift; |
| |
| my $current = $command; |
| |
| while (1) { |
| if ($current->{'type'}) { |
| if ($current->{'type'} eq 'element') { |
| return $current; |
| } |
| } |
| if ($current->{'parent'}) { |
| $current = $current->{'parent'}; |
| } else { |
| return undef; |
| } |
| } |
| } |
| |
| sub _set_pages_files($$) |
| { |
| my $self = shift; |
| my $elements = shift; |
| |
| # Ensure that the document has pages |
| return undef if (!defined($elements) or !@$elements); |
| |
| my $extension = ''; |
| $extension = '.'.$self->get_conf('EXTENSION') |
| if (defined($self->get_conf('EXTENSION')) |
| and $self->get_conf('EXTENSION') ne ''); |
| |
| if (!$self->get_conf('SPLIT')) { |
| foreach my $element (@$elements) { |
| if (!defined($element->{'filename'})) { |
| $element->{'filename'} = $self->{'output_filename'}; |
| $element->{'out_filename'} = $self->{'output_file'}; |
| } |
| } |
| } else { |
| my $node_top; |
| #my $section_top; |
| $node_top = $self->{'labels'}->{'Top'} if ($self->{'labels'}); |
| #$section_top = $self->{'extra'}->{'top'} if ($self->{'extra'}); |
| |
| my $top_node_filename = $self->_top_node_filename(); |
| # first determine the top node file name. |
| if ($self->get_conf('NODE_FILENAMES') and $node_top |
| and defined($top_node_filename)) { |
| my ($node_top_element) = $self->_get_element($node_top); |
| die "BUG: No element for top node" if (!defined($node_top)); |
| $self->_set_element_file($node_top_element, $top_node_filename); |
| } |
| my $file_nr = 0; |
| my $previous_page; |
| foreach my $element(@$elements) { |
| # For Top node. |
| next if (defined($element->{'filename'})); |
| if (!$element->{'extra'}->{'first_in_page'}) { |
| cluck ("No first_in_page for $element\n"); |
| } |
| if (!defined($element->{'extra'}->{'first_in_page'}->{'filename'})) { |
| my $file_element = $element->{'extra'}->{'first_in_page'}; |
| if ($self->get_conf('NODE_FILENAMES')) { |
| foreach my $root_command (@{$file_element->{'contents'}}) { |
| if ($root_command->{'cmdname'} |
| and $root_command->{'cmdname'} eq 'node') { |
| my $node_filename; |
| # double node are not normalized, they are handled here |
| if (!defined($root_command->{'extra'}->{'normalized'}) |
| or !defined($self->{'labels'}->{$root_command->{'extra'}->{'normalized'}})) { |
| $node_filename = 'unknown_node'; |
| } else { |
| $node_filename = $self->_node_filename($root_command->{'extra'}); |
| } |
| $node_filename .= '.'.$self->get_conf('NODE_FILE_EXTENSION') |
| if (defined($self->get_conf('NODE_FILE_EXTENSION')) |
| and $self->get_conf('NODE_FILE_EXTENSION') ne ''); |
| $self->_set_element_file($file_element, $node_filename); |
| last; |
| } |
| } |
| if (!defined($file_element->{'filename'})) { |
| # use section to do the file name if there is no node |
| my $command = $self->element_command($file_element); |
| if ($command) { |
| if ($command->{'cmdname'} eq 'top' and !$node_top |
| and defined($top_node_filename)) { |
| $self->_set_element_file($file_element, $top_node_filename); |
| } else { |
| my ($normalized_name, $filename) |
| = $self->_sectioning_command_normalized_filename($command); |
| $self->_set_element_file($file_element, $filename) |
| } |
| } else { |
| # when everything else has failed |
| if ($file_nr == 0 and !$node_top |
| and defined($top_node_filename)) { |
| $self->_set_element_file($file_element, $top_node_filename); |
| } else { |
| my $filename = $self->{'document_name'} . "_$file_nr"; |
| $filename .= $extension; |
| $self->_set_element_file($element, $filename); |
| } |
| $file_nr++; |
| } |
| } |
| } else { |
| my $filename = $self->{'document_name'} . "_$file_nr"; |
| $filename .= '.'.$self->get_conf('EXTENSION') |
| if (defined($self->get_conf('EXTENSION')) |
| and $self->get_conf('EXTENSION') ne ''); |
| $self->_set_element_file($file_element, $filename); |
| $file_nr++; |
| } |
| } |
| $element->{'filename'} |
| = $element->{'extra'}->{'first_in_page'}->{'filename'}; |
| $element->{'out_filename'} |
| = $element->{'extra'}->{'first_in_page'}->{'out_filename'}; |
| } |
| } |
| |
| foreach my $element (@$elements) { |
| $self->{'file_counters'}->{$element->{'filename'}}++; |
| print STDERR "Page $element ".Texinfo::Structuring::_print_element_command_texi($element).": $element->{'filename'}($self->{'file_counters'}->{$element->{'filename'}})\n" |
| if ($self->get_conf('DEBUG')); |
| } |
| } |
| |
| sub output($$) |
| { |
| my $self = shift; |
| my $root = shift; |
| |
| my $elements; |
| |
| if (defined($self->get_conf('OUTFILE')) |
| and ($Texinfo::Common::null_device_file{$self->get_conf('OUTFILE')} |
| or $self->get_conf('OUTFILE') eq '-' |
| or $self->get_conf('OUTFILE') eq '')) { |
| if ($self->get_conf('SPLIT')) { |
| $self->document_warn(sprintf($self->__("%s: output incompatible with split"), |
| $self->get_conf('OUTFILE'))); |
| $self->force_conf('SPLIT', 0); |
| } |
| } |
| if ($self->get_conf('SPLIT')) { |
| $self->set_conf('NODE_FILES', 1); |
| } |
| if ($self->get_conf('NODE_FILES') |
| or ($self->get_conf('SPLIT') and $self->get_conf('SPLIT') eq 'node')) { |
| $self->set_conf('NODE_FILENAMES', 1); |
| } |
| if ($self->get_conf('NODE_FILENAMES') and defined($self->get_conf('EXTENSION'))) { |
| $self->set_conf('NODE_FILE_EXTENSION', $self->get_conf('EXTENSION')); |
| } |
| |
| $self->_set_outfile(); |
| return undef unless $self->_create_destination_directory(); |
| |
| # do that now to have it available for formatting |
| # NOTE this calls Convert::Converter::_informative_command on all the |
| # @informative_global commands. |
| # Thus sets among others language and encodings. |
| $self->_set_global_multiple_commands(-1); |
| |
| if ($self->get_conf('USE_NODES')) { |
| $elements = Texinfo::Structuring::split_by_node($root); |
| } else { |
| $elements = Texinfo::Structuring::split_by_section($root); |
| } |
| |
| Texinfo::Structuring::split_pages($elements, $self->get_conf('SPLIT')); |
| |
| # determine file names associated with the different pages |
| if ($self->{'output_file'} ne '') { |
| $self->_set_pages_files($elements); |
| } |
| |
| #print STDERR "$elements $elements->[0]->{'filename'}\n"; |
| |
| # Now do the output |
| my $fh; |
| my $output = ''; |
| if (!$elements or !defined($elements->[0]->{'filename'})) { |
| # no page |
| my $outfile; |
| if ($self->{'output_file'} ne '') { |
| if ($self->get_conf('SPLIT')) { |
| $outfile = $self->_top_node_filename(); |
| if (defined($self->{'destination_directory'}) |
| and $self->{'destination_directory'} ne '') { |
| $outfile = File::Spec->catfile($self->{'destination_directory'}, |
| $outfile); |
| } |
| } else { |
| $outfile = $self->{'output_file'}; |
| } |
| print STDERR "DO No pages, output in $outfile\n" |
| if ($self->get_conf('DEBUG')); |
| $fh = $self->Texinfo::Common::open_out($outfile); |
| if (!$fh) { |
| $self->document_error(sprintf($self->__("could not open %s for writing: %s"), |
| $outfile, $!)); |
| return undef; |
| } |
| } else { |
| print STDERR "DO No pages, string output\n" |
| if ($self->get_conf('DEBUG')); |
| } |
| |
| if ($elements and @$elements) { |
| foreach my $element (@$elements) { |
| my $element_text = $self->convert_tree($element); |
| $output .= $self->_output_text($element_text, $fh); |
| } |
| } else { |
| $output .= $self->_output_text($self->convert($root), $fh); |
| } |
| # NOTE do not close STDOUT now to avoid a perl warning. |
| if ($fh and $outfile ne '-') { |
| $self->register_close_file($outfile); |
| if (!close($fh)) { |
| $self->document_error(sprintf($self->__("error on closing %s: %s"), |
| $outfile, $!)); |
| } |
| } |
| return $output if ($self->{'output_file'} eq ''); |
| } else { |
| # output with pages |
| print STDERR "DO Elements with filenames\n" |
| if ($self->get_conf('DEBUG')); |
| my %files; |
| |
| foreach my $element (@$elements) { |
| my $file_fh; |
| # open the file and output the elements |
| if (!$files{$element->{'filename'}}->{'fh'}) { |
| $file_fh = $self->Texinfo::Common::open_out($element->{'out_filename'}); |
| if (!$file_fh) { |
| $self->document_error(sprintf($self->__("could not open %s for writing: %s"), |
| $element->{'out_filename'}, $!)); |
| return undef; |
| } |
| $files{$element->{'filename'}}->{'fh'} = $file_fh; |
| } else { |
| $file_fh = $files{$element->{'filename'}}->{'fh'}; |
| } |
| my $element_text = $self->convert_tree($element); |
| print $file_fh $element_text; |
| $self->{'file_counters'}->{$element->{'filename'}}--; |
| if ($self->{'file_counters'}->{$element->{'filename'}} == 0) { |
| # NOTE do not close STDOUT here to avoid a perl warning |
| if ($element->{'out_filename'} ne '-') { |
| $self->register_close_file($element->{'out_filename'}); |
| if (!close($file_fh)) { |
| $self->document_error(sprintf($self->__("error on closing %s: %s"), |
| $element->{'out_filename'}, $!)); |
| return undef; |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| sub _bug_message($$;$) |
| { |
| my $self = shift; |
| my $message = shift; |
| my $current = shift; |
| |
| my $line_message = ''; |
| my $current_element_message = ''; |
| if ($current) { |
| if ($current->{'line_nr'}) { |
| my $line_number = $current->{'line_nr'}; |
| my $file = $line_number->{'file_name'}; |
| $line_message |
| = "in: $line_number->{'file_name'}:$line_number->{'line_nr'}"; |
| if ($line_number->{'macro'} ne '') { |
| $line_message .= " (possibly involving $line_number->{'macro'})"; |
| } |
| $line_message .= "\n"; |
| } |
| if ($current) { |
| $current_element_message = "current: ". |
| Texinfo::Parser::_print_current($current); |
| } |
| } |
| my $additional_information = ''; |
| if ($line_message.$current_element_message ne '') { |
| $additional_information = "Additional informations:\n". |
| $line_message.$current_element_message; |
| } |
| warn "You found a bug: $message\n\n".$additional_information; |
| } |
| |
| # This is not used as code, but used to mark months as strings to be |
| # translated |
| if (0) { |
| my $self; |
| my @mark_month_for_translation = ( |
| $self->gdt('January'), |
| $self->gdt('February'), |
| $self->gdt('March'), |
| $self->gdt('April'), |
| $self->gdt('May'), |
| $self->gdt('June'), |
| $self->gdt('July'), |
| $self->gdt('August'), |
| $self->gdt('September'), |
| $self->gdt('October'), |
| $self->gdt('November'), |
| $self->gdt('December') |
| ); |
| } |
| |
| sub _create_destination_directory($) |
| { |
| my $self = shift; |
| if (defined($self->{'destination_directory'}) |
| and ! -d $self->{'destination_directory'}) { |
| if (!mkdir($self->{'destination_directory'}, oct(755))) { |
| if ($self->get_conf('SPLIT') |
| and $self->get_conf('EXTENSION') |
| and $self->get_conf('EXTENSION') ne '') { |
| my ($volume, $directories, $file) |
| = File::Spec->splitpath($self->{'destination_directory'}, 1); |
| my $new_directory = File::Spec->catpath($volume, |
| $directories . '.' . $self->get_conf('EXTENSION'), $file); |
| if (! -d $new_directory) { |
| if (!mkdir($new_directory, oct(755))) { |
| $self->document_error(sprintf($self->__( |
| "could not create directories `%s' or `%s': %s"), |
| $self->{'destination_directory'}, $new_directory, $!)); |
| return undef; |
| } |
| } |
| $self->{'destination_directory'} = $new_directory; |
| } else { |
| $self->document_error(sprintf($self->__( |
| "could not create directory `%s': %s"), |
| $self->{'destination_directory'}, $!)); |
| return undef; |
| } |
| } |
| } |
| return 1; |
| } |
| |
| sub _float_type_number($$) |
| { |
| my $self = shift; |
| my $float = shift; |
| |
| my $type; |
| if ($float->{'extra'}->{'type'} |
| and $float->{'extra'}->{'type'}->{'normalized'} ne '') { |
| $type = {'contents' => $float->{'extra'}->{'type'}->{'content'}}; |
| } |
| |
| my $tree; |
| if ($type) { |
| if (defined($float->{'number'})) { |
| $tree = $self->gdt("{float_type} {float_number}", |
| {'float_type' => $type, |
| 'float_number' => $float->{'number'}}); |
| } else { |
| $tree = $self->gdt("{float_type}", |
| {'float_type' => $type}); |
| } |
| } elsif (defined($float->{'number'})) { |
| $tree = $self->gdt("{float_number}", |
| {'float_number' => $float->{'number'}}); |
| } |
| return $tree; |
| } |
| |
| # This is used when the formatted text has no comment nor new line, but |
| # one want to add the comment or new line from the original arg |
| sub _end_line_or_comment($$) |
| { |
| my $self = shift; |
| my $contents_possible_comment = shift; |
| my $end_line; |
| if ($contents_possible_comment |
| and $contents_possible_comment->[-1]->{'cmdname'} |
| and ($contents_possible_comment->[-1]->{'cmdname'} eq 'c' |
| or $contents_possible_comment->[-1]->{'cmdname'} eq 'comment')) { |
| $end_line = $self->convert_tree($contents_possible_comment->[-1]); |
| } elsif ($contents_possible_comment |
| and $contents_possible_comment->[-1]->{'text'}) { |
| my $text = $contents_possible_comment->[-1]->{'text'}; |
| if (chomp($text)) { |
| $end_line = "\n"; |
| } else { |
| $end_line = ''; |
| } |
| } else { |
| $end_line = ''; |
| } |
| return $end_line; |
| } |
| |
| sub _tree_without_comment($) |
| { |
| my $contents_possible_comment = shift; |
| my $comment; |
| my $tree; |
| |
| if ($contents_possible_comment->{'contents'} |
| and $contents_possible_comment->{'contents'}->[-1]->{'cmdname'} |
| and ($contents_possible_comment->{'contents'}->[-1]->{'cmdname'} eq 'c' |
| or $contents_possible_comment->{'contents'}->[-1]->{'cmdname'} eq 'comment')) { |
| my @contents = @{$contents_possible_comment->{'contents'}}; |
| $comment = pop @contents; |
| $tree = {'contents' => \@contents}; |
| # FIXME why this selection, and not everything? |
| foreach my $key ('extra', 'type', 'cmdname', 'parent', 'line_nr') { |
| $tree->{$key} = $contents_possible_comment->{$key} |
| if (exists($contents_possible_comment->{$key})); |
| } |
| } else { |
| $tree = $contents_possible_comment; |
| } |
| return ($comment, $tree); |
| } |
| |
| sub _convert_argument_and_end_line($$) |
| { |
| my $self = shift; |
| my $root = shift; |
| my ($comment, $tree) |
| = _tree_without_comment($root); |
| my $converted = $self->convert_tree($tree); |
| my $end_line; |
| if ($comment) { |
| $end_line = $self->convert_tree($comment); |
| } else { |
| if (chomp($converted)) { |
| $end_line = "\n"; |
| } else { |
| $end_line = ""; |
| } |
| } |
| return ($converted, $end_line); |
| } |
| |
| sub _collect_leading_trailing_spaces_arg($$) |
| { |
| my $self = shift; |
| my $arg = shift; |
| #print STDERR "$arg->{'type'}: @{$arg->{'contents'}}\n"; |
| my @result; |
| if ($arg->{'contents'} and $arg->{'contents'}->[0] |
| and defined($arg->{'contents'}->[0]->{'text'}) |
| and $arg->{'contents'}->[0]->{'text'} !~ /\S/ |
| and defined($arg->{'contents'}->[0]->{'type'})) { |
| #print STDERR "$arg->{'contents'}->[0]->{'type'}\n"; |
| warn "Unknown leading space type $arg->{'contents'}->[0]->{'type'}\n" |
| if ($arg->{'contents'}->[0]->{'type'} ne 'empty_spaces_after_command' |
| and $arg->{'contents'}->[0]->{'type'} ne 'empty_spaces_before_argument' |
| # FIXME should we really catch this too? |
| and $arg->{'contents'}->[0]->{'type'} ne 'empty_line_after_command' |
| ); |
| $result[0] = $arg->{'contents'}->[0]->{'text'}; |
| return @result if (scalar(@{$arg->{'contents'}}) == 1); |
| } |
| if ($arg->{'contents'}) { |
| my $index = -1; |
| $index-- if ($arg->{'contents'}->[-1] |
| and $arg->{'contents'}->[-1]->{'cmdname'} |
| and ($arg->{'contents'}->[-1]->{'cmdname'} eq 'c' |
| or $arg->{'contents'}->[-1]->{'cmdname'} eq 'comment')); |
| if (scalar(@{$arg->{'contents'}}) + $index > 0) { |
| if ($arg->{'contents'}->[$index] |
| and defined($arg->{'contents'}->[$index]->{'text'}) |
| and $arg->{'contents'}->[$index]->{'text'} !~ /\S/ |
| and defined($arg->{'contents'}->[$index]->{'type'})) { |
| #print STDERR "$arg->{'contents'}->[$index]->{'type'}\n"; |
| warn "Unknown trailing space type $arg->{'contents'}->[$index]->{'type'}\n" |
| if ($arg->{'contents'}->[$index]->{'type'} ne 'spaces_at_end' |
| and $arg->{'contents'}->[$index]->{'type'} ne 'space_at_end_block_command' |
| ); |
| $result[1] = $arg->{'contents'}->[$index]->{'text'}; |
| } |
| } |
| } |
| return @result; |
| } |
| |
| sub _table_item_content_tree($$$) |
| { |
| my $self = shift; |
| my $root = shift; |
| my $contents = shift; |
| |
| my $converted_tree = {'parent' => $root}; |
| my $table_command = $root->{'parent'}->{'parent'}->{'parent'}; |
| if ($table_command->{'extra'} |
| and $table_command->{'extra'}->{'command_as_argument'}) { |
| my $command_as_argument |
| = $table_command->{'extra'}->{'command_as_argument'}; |
| my $command = {'cmdname' => $command_as_argument->{'cmdname'}, |
| 'line_nr' => $root->{'line_nr'}, |
| 'parent' => $converted_tree }; |
| if ($command_as_argument->{'type'} eq 'definfoenclose_command') { |
| $command->{'type'} = $command_as_argument->{'type'}; |
| $command->{'extra'}->{'begin'} = $command_as_argument->{'extra'}->{'begin'}; |
| $command->{'extra'}->{'end'} = $command_as_argument->{'extra'}->{'end'}; |
| } |
| my $arg = {'type' => 'brace_command_arg', |
| 'contents' => $contents, |
| 'parent' => $command,}; |
| $command->{'args'} = [$arg]; |
| $self->Texinfo::Parser::_register_command_arg($arg, 'brace_command_contents'); |
| $contents = [$command]; |
| } |
| $converted_tree->{'contents'} = $contents; |
| return $converted_tree; |
| } |
| |
| sub _level_corrected_section($$) |
| { |
| my $self = shift; |
| my $root = shift; |
| my $heading_level = $root->{'level'}; |
| my $command; |
| if ($heading_level ne $Texinfo::Common::command_structuring_level{$root->{'cmdname'}}) { |
| $command |
| = $Texinfo::Common::level_to_structuring_command{$root->{'cmdname'}}->[$heading_level]; |
| } else { |
| $command = $root->{'cmdname'}; |
| } |
| return $command; |
| } |
| |
| # generic output method |
| sub output_no_split($$) |
| { |
| my $self = shift; |
| my $root = shift; |
| |
| $self->_set_outfile(); |
| return undef unless $self->_create_destination_directory(); |
| |
| my $fh; |
| if (! $self->{'output_file'} eq '') { |
| $fh = $self->Texinfo::Common::open_out($self->{'output_file'}); |
| if (!$fh) { |
| $self->document_error(sprintf($self->__("could not open %s for writing: %s"), |
| $self->{'output_file'}, $!)); |
| return undef; |
| } |
| } |
| |
| $self->_set_global_multiple_commands(-1); |
| |
| if ($self->get_conf('USE_NODES')) { |
| return $self->convert_document_nodes($root, $fh); |
| } else { |
| return $self->convert_document_sections($root, $fh); |
| } |
| } |
| |
| # output fo $fh if defined, otherwise return the text. |
| sub _output_text($$$) |
| { |
| my $self = shift; |
| my $text = shift; |
| my $fh = shift; |
| if ($fh) { |
| print $fh $text; |
| return ''; |
| } else { |
| return $text; |
| } |
| } |
| |
| sub _convert_document_elements($$;$$) |
| { |
| my $self = shift; |
| my $root = shift; |
| my $elements = shift; |
| my $fh = shift; |
| |
| if ($elements) { |
| my $result = ''; |
| foreach my $element (@$elements) { |
| $result .= $self->_output_text($self->convert_tree($element), $fh); |
| } |
| return $result; |
| } else { |
| return $self->_output_text($self->convert_tree($root), $fh); |
| } |
| } |
| |
| sub convert_document_sections($$;$) |
| { |
| my $self = shift; |
| my $root = shift; |
| my $fh = shift; |
| |
| my $elements = Texinfo::Structuring::split_by_section($root); |
| return $self->_convert_document_elements($root, $elements, $fh); |
| } |
| |
| sub convert_document_nodes($$;$) |
| { |
| my $self = shift; |
| my $root = shift; |
| my $fh = shift; |
| |
| my $elements = Texinfo::Structuring::split_by_node($root); |
| return $self->_convert_document_elements($root, $elements, $fh); |
| } |
| |
| # if in this container, we are 'inline', within a running text |
| my @inline_types = ('def_line', 'paragraph', 'preformatted', |
| 'misc_command_arg', 'misc_line_arg', 'block_line_arg', |
| 'menu_entry_name', 'menu_entry_node'); |
| |
| my %inline_types; |
| foreach my $type (@inline_types) { |
| $inline_types{$type} = 1; |
| } |
| |
| my %not_inline_commands = (%Texinfo::Common::root_commands, |
| %Texinfo::Common::block_commands, %Texinfo::Common::context_brace_command); |
| |
| # Return 1 if inline in a running text, 0 if right in top-level or block |
| # environment, and undef otherwise. |
| sub _inline_or_block($$) |
| { |
| my $self = shift; |
| my $current = shift; |
| if ($current->{'type'} and $inline_types{$current->{'type'}}) { |
| return 1; |
| } elsif ($current->{'cmdname'} |
| and exists($not_inline_commands{$current->{'cmdname'}})) { |
| return 0; |
| } else { |
| return undef; |
| } |
| } |
| |
| # return true if in running text context |
| sub _is_inline($$) |
| { |
| my $self = shift; |
| my $current = shift; |
| while ($current->{'parent'}) { |
| $current = $current->{'parent'}; |
| my $inline_or_block = $self->_inline_or_block($current); |
| return ($inline_or_block) if (defined($inline_or_block)); |
| } |
| return 0; |
| } |
| |
| # return true if container or parent may hold running text |
| sub _in_inline($$) |
| { |
| my $self = shift; |
| my $current = shift; |
| my $inline_or_block = $self->_inline_or_block($current); |
| return ($inline_or_block) if (defined($inline_or_block)); |
| return $self->_is_inline($current); |
| } |
| |
| our %default_args_code_style = ( |
| 'email' => [1], |
| 'anchor' => [1], |
| 'uref' => [1], |
| 'url' => [1], |
| 'math' => [1], |
| 'inforef' => [1,undef,1], |
| 'image' => [1, 1, 1, undef, 1], |
| # and type? |
| 'float' => [1], |
| ); |
| |
| foreach my $code_style_command (keys(%Texinfo::Common::code_style_commands)) { |
| $default_args_code_style{$code_style_command} = [1]; |
| } |
| foreach my $ref_cmd ('pxref', 'xref', 'ref') { |
| $default_args_code_style{$ref_cmd} = [1, undef, undef, 1]; |
| } |
| |
| sub convert_accents($$$;$) |
| { |
| my $self = shift; |
| my $accent = shift; |
| my $format_accents = shift; |
| my $in_upper_case = shift; |
| |
| my ($contents, $stack) |
| = Texinfo::Common::find_innermost_accent_contents($accent); |
| my $result = $self->convert_tree({'contents' => $contents}); |
| |
| my $encoded; |
| if ($self->get_conf('ENABLE_ENCODING')) { |
| $encoded = Texinfo::Convert::Unicode::encoded_accents($self, $result, $stack, |
| $self->get_conf('OUTPUT_ENCODING_NAME'), |
| $format_accents, |
| $in_upper_case); |
| } |
| if (!defined($encoded)) { |
| foreach my $accent_command (reverse(@$stack)) { |
| $result = &$format_accents ($self, $result, $accent_command, |
| $in_upper_case); |
| } |
| return $result; |
| } else { |
| return $encoded; |
| } |
| } |
| |
| # This method allows to count words in elements and returns an array |
| # and a text already formatted. |
| sub sort_element_counts($$;$$) |
| { |
| my $converter = shift; |
| my $tree = shift; |
| my $use_sections = shift; |
| my $count_words = shift; |
| |
| my $elements; |
| if ($use_sections) { |
| $elements = Texinfo::Structuring::split_by_section($tree); |
| } else { |
| $elements = Texinfo::Structuring::split_by_node($tree); |
| } |
| |
| if (!$elements) { |
| @$elements = ($tree); |
| } elsif (scalar(@$elements) >= 1 |
| and (!$elements->[0]->{'extra'}->{'node'} |
| and !$elements->[0]->{'extra'}->{'section'})) { |
| shift @$elements; |
| } |
| |
| my $max_count = 0; |
| my @name_counts_array; |
| foreach my $element (@$elements) { |
| my $name = 'UNNAMED element'; |
| if ($element->{'extra'} |
| and ($element->{'extra'}->{'node'} or $element->{'extra'}->{'section'})) { |
| my $command = $element->{'extra'}->{'element_command'}; |
| if ($command->{'cmdname'} eq 'node') { |
| $name = Texinfo::Convert::Texinfo::convert({'contents' |
| => $command->{'extra'}->{'nodes_manuals'}->[0]->{'node_content'}}); |
| } else { |
| $name = "\@$command->{'cmdname'}" |
| .Texinfo::Convert::Texinfo::convert($command->{'args'}->[0]); |
| } |
| } |
| chomp($name); |
| my $count; |
| my $element_content = $converter->convert_tree($element); |
| if ($count_words) { |
| my @res = split /\W+/, $element_content; |
| $count = scalar(@res); |
| } else { |
| my @res = split /^/, $element_content; |
| $count = scalar(@res); |
| } |
| push @name_counts_array, [$count, $name]; |
| if ($count > $max_count) { |
| $max_count = $count; |
| } |
| } |
| |
| my @sorted_name_counts_array = sort {$a->[0] <=> $b->[0]} @name_counts_array; |
| @sorted_name_counts_array = reverse(@sorted_name_counts_array); |
| |
| my $max_length = length($max_count); |
| my $result = ''; |
| foreach my $sorted_count (@sorted_name_counts_array) { |
| $result .= sprintf("%${max_length}d $sorted_count->[1]\n", $sorted_count->[0]); |
| } |
| return (\@sorted_name_counts_array, $result); |
| } |
| |
| # XML related methods and variables that may be used in different |
| # XML Converters. |
| sub xml_protect_text($$) |
| { |
| my $self = shift; |
| my $text = shift; |
| if (!defined($text)) { |
| cluck; |
| } |
| $text =~ s/&/&/g; |
| $text =~ s/</</g; |
| $text =~ s/>/>/g; |
| $text =~ s/\"/"/g; |
| return $text; |
| } |
| |
| # 'today' is not set here. |
| our %default_xml_commands_formatting; |
| $default_xml_commands_formatting{'normal'} = { |
| 'TeX' => 'TeX', |
| 'LaTeX' => 'LaTeX', |
| 'bullet' => '•', |
| 'copyright' => '©', |
| 'registeredsymbol' => '®', |
| 'dots' => '…', |
| 'enddots' => '...', |
| 'equiv' => '≡', |
| # in general the following is not used since error |
| # appears in 'translated_commands' |
| 'error' => 'error-->', |
| 'expansion' => '→', |
| 'arrow' => '→', |
| 'click' => '→', |
| 'minus' => '-', |
| 'point' => '∗', |
| 'print' => '-|', |
| 'result' => '⇒', |
| 'aa' => 'å', |
| 'AA' => 'Å', |
| 'ae' => 'æ', |
| 'oe' => 'œ', #pertusus: also œ. œ not in html 3.2 |
| 'AE' => 'Æ', |
| 'OE' => 'Œ', #pertusus: also Œ. Œ not in html 3.2 |
| 'o' => 'ø', |
| 'O' => 'Ø', |
| 'ss' => 'ß', |
| 'DH' => 'Ð', |
| 'dh' => 'ð', |
| 'TH' => 'Þ', |
| 'th' => 'þ', |
| 'l' => 'ł', |
| 'L' => 'Ł', |
| 'exclamdown' => '¡', |
| 'questiondown' => '¿', |
| 'pounds' => '£', |
| 'ordf' => 'ª', |
| 'ordm' => 'º', |
| 'comma' => ',', |
| 'atchar' => '@', |
| 'lbracechar' => '{', |
| 'rbracechar' => '}', |
| 'backslashchar' => '\\', |
| 'hashchar' => '#', |
| 'euro' => '€', |
| 'geq' => '≥', |
| 'leq' => '≤', |
| 'tie' => ' ', |
| 'textdegree' => '°', |
| 'quotedblleft' => '“', |
| 'quotedblright' => '”', |
| 'quoteleft' => '‘', |
| 'quoteright' => '’', |
| 'quotedblbase' => '„', |
| 'quotesinglbase' => '‚', |
| 'guillemetleft' => '«', |
| 'guillemetright' => '»', |
| 'guillemotleft' => '«', |
| 'guillemotright' => '»', |
| 'guilsinglleft' => '‹', |
| 'guilsinglright' => '›', |
| }; |
| |
| foreach my $no_brace_command (keys(%Texinfo::Common::no_brace_commands)) { |
| $default_xml_commands_formatting{'normal'}->{$no_brace_command} |
| = $Texinfo::Common::no_brace_commands{$no_brace_command}; |
| } |
| |
| sub xml_comment($$) |
| { |
| my $self = shift; |
| my $text = shift; |
| chomp $text; |
| $text =~ s/--+/-/go; |
| return '<!--' . $text . ' -->' . "\n"; |
| } |
| |
| our %xml_accent_entities = ( |
| '"', 'uml', |
| '~', 'tilde', |
| '^', 'circ', |
| '`', 'grave', |
| "'", 'acute', |
| ",", 'cedil', |
| 'ringaccent', 'ring', |
| 'ogonek', 'ogon', |
| ); |
| |
| my %xml_accent_text_with_entities = ( |
| 'ringaccent' => 'aA', |
| "'" => 'aeiouyAEIOUY', |
| ',' => 'cC', |
| '^' => 'aeiouAEIOU', |
| '`' => 'aeiouAEIOU', |
| '~' => 'nNaoAO', |
| '"' => 'aeiouyAEIOU', |
| # according to http://www2.lib.virginia.edu/small/vhp/download/ISO.txt |
| # however this doesn't seems to work in firefox |
| # 'ogonek' => 'aeiuAEIU', |
| ); |
| |
| |
| sub xml_accent($$$;$$) |
| { |
| my $self = shift; |
| my $text = shift; |
| my $command = shift; |
| my $in_upper_case = shift; |
| my $use_numeric_entities = shift; |
| my $accent = $command->{'cmdname'}; |
| |
| if ($in_upper_case and $text =~ /^\w$/) { |
| $text = uc ($text); |
| } |
| |
| return "&${text}$xml_accent_entities{$accent};" |
| if (defined($xml_accent_entities{$accent}) |
| and defined($xml_accent_text_with_entities{$accent}) |
| and ($text =~ /^[$xml_accent_text_with_entities{$accent}]$/)); |
| if ($use_numeric_entities |
| and exists($Texinfo::Convert::Unicode::unicode_accented_letters{$accent}) |
| and exists($Texinfo::Convert::Unicode::unicode_accented_letters{$accent}->{$text})) { |
| return '&#' . |
| hex($Texinfo::Convert::Unicode::unicode_accented_letters{$accent}->{$text}). ';'; |
| } |
| return $text . '<' if ($accent eq 'v'); |
| # FIXME it is not possible to call xml_protect_text since what is in $text |
| # may already be xml. But this means that each time ascii_accent changes |
| # it should be changed here too. |
| return Texinfo::Convert::Text::ascii_accent($text, $command); |
| } |
| |
| sub _xml_accent_numeric_entities($$;$) |
| { |
| my $self = shift; |
| my $text = shift; |
| my $command = shift; |
| my $in_upper_case = shift; |
| return $self->xml_accent($text, $command, $in_upper_case, 1); |
| } |
| |
| sub xml_accents($$;$) |
| { |
| my $self = shift; |
| my $accent = shift; |
| my $in_upper_case = shift; |
| |
| my $format_accents; |
| if ($self->get_conf('USE_NUMERIC_ENTITY')) { |
| $format_accents = \&_xml_accent_numeric_entities; |
| } else { |
| $format_accents = \&xml_accent; |
| } |
| |
| return $self->convert_accents($accent, $format_accents, $in_upper_case); |
| } |
| |
| 1; |
| |
| __END__ |
| |
| =head1 NAME |
| |
| Texinfo::Convert::Converter - Parent class for Texinfo tree converters |
| |
| =head1 SYNOPSIS |
| |
| package Texinfo::Convert::MyConverter; |
| |
| use Texinfo::Convert::Converter; |
| @ISA = qw(Texinfo::Convert::Converter); |
| |
| sub converter_defaults ($$) { |
| return %myconverter_defaults; |
| } |
| sub converter_initialize($) { |
| my $self = shift; |
| $self->{'document_context'} = [{}]; |
| } |
| sub converter_global_commands($) { |
| return ('documentlanguage', documentencoding', 'paragraphindent'); |
| } |
| |
| sub convert($$) { |
| ... |
| } |
| sub convert_tree($$) { |
| ... |
| } |
| sub output($$) { |
| ... |
| } |
| |
| # end of Texinfo::Convert::MyConverter |
| |
| my $converter = Texinfo::Convert::MyConverter->converter( |
| {'parser' => $parser}); |
| $converter->output($texinfo_tree); |
| |
| =head1 DESCRIPTION |
| |
| Texinfo::Convert::Converter is a super class that can be used to |
| simplify converters initialization. The class also provide some |
| useful methods. |
| |
| In turn, the converter should define some methods. Three are |
| optional, C<converter_defaults>, C<converter_initialize> and |
| C<converter_global_commands> and used for initialization, to |
| give C<Texinfo::Convert::Converter> some informations. |
| |
| The C<convert_tree> method is more or less mandatory and should |
| convert portions of Texinfo tree. The C<output> and C<convert> |
| are not required, but customarily used by converters as entry |
| points for conversion to a file with headers and so on, or |
| conversion of a whole Texinfo tree. |
| |
| Existing backends may be used as examples that implement those |
| methods. C<Texinfo::Convert::Texinfo> together with |
| C<Texinfo::Convert::PlainTexinfo>, as well as |
| C<Texinfo::Convert::TextContent> are trivial examples. |
| C<Texinfo::Convert::Text> is less trivial, although still simplistic, |
| while C<Texinfo::Convert::DocBook> is a real converter |
| that is also not too complex. |
| |
| L<Texinfo::Common>, L<Texinfo::Convert::Unicode> |
| and L<Texinfo::Report> document modules or additional function |
| that may be useful for backends, while the parsed Texinfo tree is |
| described in L<Texinfo::Parser>. |
| |
| |
| =head1 METHODS |
| |
| =head2 Initialization |
| |
| A module subclassing C<Texinfo::Convert::Converter> is created by calling |
| the C<converter> method that should be inherited from |
| C<Texinfo::Convert::Converter>. |
| |
| =over |
| |
| =item $converter = MyConverter->converter($options) |
| |
| The I<$options> hash reference holds options for the converter. In |
| this option hash reference a L<parser object|Texinfo::Parser> |
| may be associated with the I<parser> key. The other options |
| should be configuration options described in the Texinfo manual. |
| Those options, when appropriate, override the document content. |
| |
| The C<converter> function returns a converter object (a blessed hash |
| reference) after checking the options and performing some initializations, |
| especially when a parser is given among the options. The converter is |
| also initialized as a L<Texinfo::Report>. |
| |
| =back |
| |
| To help with these initializations, the modules can define three methods: |
| |
| =over |
| |
| =item %defaults = $converter->converter_defaults($options) |
| |
| The converter can provide a defaults hash for configuration options. |
| The I<$options> hash reference holds options for the converter. |
| |
| =item @global_commands = $converter->converter_global_commands() |
| |
| The list returned is the list of Texinfo global commands (like |
| C<@paragraphindent>, C<@documentlanguage>...) that are relevant for the |
| converter. |
| |
| =item converter_initialize |
| |
| This method is called at the end of the converter initialization. |
| |
| =back |
| |
| =head2 Helper methods |
| |
| C<Texinfo::Convert::Converter> provides methods |
| that may be useful for every converter: |
| |
| =over |
| |
| =item $converter->get_conf($option_string) |
| |
| Returns the value of the Texinfo configuration option I<$option_string>. |
| |
| =item $converter->set_conf($option_string, $value) |
| |
| Set the Texinfo configuration option I<$option_string> to I<$value> if |
| not set as a converter option. |
| |
| =item $converter->force_conf($option_string, $value) |
| |
| Set the Texinfo configuration option I<$option_string> to I<$value>. |
| This should rarely be used, but the purpose of this method is to be able |
| to revert a configuration that is always wrong for a given output |
| format, like the splitting for example. |
| |
| =item $result = $converter->convert_document_sections($root, $file_handler) |
| |
| This method splits the I<$root> Texinfo tree at sections and |
| calls C<convert_tree> on the elements. If the optional I<$file_handler> |
| is given in argument, the result are output in I<$file_handler>, otherwise |
| the resulting string is returned. |
| |
| =item $result = $converter->convert_accents($accent_command, \&format_accents, $in_upper_case) |
| |
| I<$accent_command> is an accent command, which may have other accent |
| commands nested. The function returns the accents formatted either |
| as encoded letters, or formatted using I<\&format_accents>. |
| If I<$in_upper_case> is set, the result should be uppercased. |
| |
| =back |
| |
| Other C<Texinfo::Convert::Converter> methods target conversion to XML: |
| |
| =over |
| |
| =item $protected_text = $converter->xml_protect_text($text) |
| |
| Protect special XML characters (&, E<lt>, E<gt>, ") of I<$text>. |
| |
| =item $comment = $converter->xml_comment($text) |
| |
| Returns an XML comment for I<$text>. |
| |
| =item $result = xml_accent($text, $accent_command, $in_upper_case, $use_numeric_entities) |
| |
| I<$text> is the text appearing within an accent command. I<$accent_command> |
| should be a Texinfo tree element corresponding to an accent command taking |
| an argument. I<$in_upper_case> is optional, and, if set, the text is put |
| in upper case. The function returns the accented letter as XML entity |
| if possible. I<$use_numeric_entities> is also optional, and, if set, and |
| there is no XML entity, the numerical entity corresponding to Unicode |
| points is preferred to an ASCII transliteration. |
| |
| =item $result = $converter->xml_accents($accent_command, $in_upper_case) |
| |
| I<$accent_command> is an accent command, which may have other accent |
| commands nested. If I<$in_upper_case> is set, the result should be |
| upper cased. The function returns the accents formatted as XML. |
| |
| =back |
| |
| Finally, there is: |
| |
| =item $result = $converter->output_internal_links() |
| |
| At this level, the method just returns undef. It is used in the HTML |
| output, following the C<--internal-links> option of texi2any/makeinfo |
| specification. |
| |
| =back |
| |
| =head1 SEE ALSO |
| |
| L<Texinfo::Common>, L<Texinfo::Convert::Unicode>, L<Texinfo::Report> |
| and L<Texinfo::Parser>. |
| |
| =head1 AUTHOR |
| |
| Patrice Dumas, E<lt>pertusus@free.frE<gt> |
| |
| =head1 COPYRIGHT AND LICENSE |
| |
| Copyright 2011, 2012, 2013, 2014, 2015 Free Software Foundation, Inc. |
| |
| This library is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3 of the License, or (at |
| your option) any later version. |
| |
| =cut |