blob: bfa18245233ac970ab7c1f72b03910b86c9ce2e4 [file] [log] [blame]
# texindex.awk, generated by jrtangle from ti.twjr.
#
# Copyright 2014, 2015, 2016 Free Software Foundation, Inc.
#
# This file is part of GNU Texinfo.
#
# Texinfo is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Texinfo is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
# ftrans.awk --- handle data file transitions
#
# user supplies beginfile() and endfile() functions
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# November 1992
FNR == 1 {
if (_filename_ != "")
endfile(_filename_)
_filename_ = FILENAME
beginfile(FILENAME)
}
END { endfile(_filename_) }
# join.awk --- join an array into a string
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993
function join(array, start, end, sep, result, i)
{
if (sep == "")
sep = " "
else if (sep == SUBSEP) # magic value
sep = ""
result = array[start]
for (i = start + 1; i <= end; i++)
result = result sep array[i]
return result
}
# quicksort --- C.A.R. Hoare's quick sort algorithm. See Wikipedia
# or almost any algorithms or computer science text
# Adapted from K&R-II, page 110
#
function quicksort(data, left, right, i, last)
{
if (left >= right) # do nothing if array contains fewer
return # than two elements
quicksort_swap(data, left, int((left + right) / 2))
last = left
for (i = left + 1; i <= right; i++)
if (less_than(data[i], data[left]))
quicksort_swap(data, ++last, i)
quicksort_swap(data, left, last)
quicksort(data, left, last - 1)
quicksort(data, last + 1, right)
}
# quicksort_swap --- quicksort helper function, could be inline
#
function quicksort_swap(data, i, j, temp)
{
temp = data[i]
data[i] = data[j]
data[j] = temp
}
function del_array(a)
{
# Portable and faster than
# for (i in a)
# delete a[i]
split("", a)
}
function check_split_null( n, a)
{
n = split("abcde", a, "")
return (n == 5)
}
function char_split(string, array, n, i)
{
if (Can_split_null)
return split(string, array, "")
# do it the hard way
del_array(array)
n = length(string)
for (i = 1; i <= n; i++)
array[i] = substr(string, i, 1)
return n
}
function fatal(format, arg1, arg2, arg3, arg4, arg5,
arg6, arg7, arg8, arg9, arg10)
{
printf(format, arg1, arg2, arg3, arg4, arg5,
arg6, arg7, arg8, arg9, arg10) > "/dev/stderr"
exit EXIT_FAILURE
}
function isupper(c)
{
return index("ABCDEFGHIJKLMNOPQRSTUVWXYZ", c)
}
function islower(c)
{
return index("abcdefghijklmnopqrstuvwxyz", c)
}
function isalpha(c)
{
return islower(c) || isupper(c)
}
function isdigit(c)
{
return index("0123456789", c)
}
function usage(exit_val)
{
printf(_"Usage: %s [OPTION]... FILE...\n", Invocation_name)
print _"Generate a sorted index for each TeX output FILE."
print _"Usually FILE... is specified as `foo.??' for a document `foo.texi'."
print ""
print _"Options:"
print _" -h, --help display this help and exit"
print _" --version display version information and exit"
print _" -- end option processing"
print ""
print _"Email bug reports to bug-texinfo@gnu.org,\n\
general questions and discussion to help-texinfo@gnu.org.\n\
Texinfo home page: http://www.gnu.org/software/texinfo/";
exit exit_val
}
function version()
{
print "texindex (GNU texinfo)", Texindex_version
print ""
printf _"Copyright (C) %s Free Software Foundation, Inc.\n\
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
This is free software: you are free to change and redistribute it.\n\
There is NO WARRANTY, to the extent permitted by law.\n", "2016";
exit EXIT_SUCCESS
}
BEGIN {
TRUE = 1
FALSE = 0
EXIT_SUCCESS = 0
EXIT_FAILURE = 1
Texindex_version = "6.1"
if (! Invocation_name) {
# provide fallback in case it's not passed in.
Invocation_name = "texindex"
}
Can_split_null = check_split_null()
TEXTDOMAIN = "texinfo"
for (i = 1; i < ARGC; i++) {
if (ARGV[i] == "-h" || ARGV[i] == "--help") {
usage(EXIT_SUCCESS)
} else if (ARGV[i] == "--version") {
version()
} else if (ARGV[i] == "-k" || ARGV[i] == "--keep") {
# do nothing, backwards compatibility
delete ARGV[i]
} else if (ARGV[i] == "--") {
delete ARGV[i]
break
} else if (ARGV[i] ~ /^--?.+/) {
fatal(_"%s: unrecognized option `%s'\n" \
"Try `%s --help' for more information.\n",
Invocation_name, ARGV[i], Invocation_name)
exit EXIT_FAILURE
} else {
break
}
}
}
function beginfile(filename)
{
Output_file = filename "s"
# Reinitialize these for each input file
del_array(Data)
del_array(Keys)
del_array(Seen)
Entries = 0
Do_initials = FALSE
Prev_initial = ""
Command_char = substr($0, 1, 1)
if ( (Command_char != "\\" && Command_char != "@") \
|| substr($0, 2, 5) != "entry")
fatal(_"%s is not a Texinfo index file\n", filename)
Special_chars = "{}"
}
function endfile(filename, i, prev_initial, initial)
{
# sort the entries
quicksort(Keys, 1, Entries)
for (i = 1; i <= Entries; i++) {
# deal with initial
initial = Data[Keys[i], "initial"]
if (initial != prev_initial) {
prev_initial = initial
print_initial(initial)
}
# write the actual line \entry {...}{...}
printf("%centry {%s}{%s}\n",
Command_char,
Data[Keys[i], "text"],
Data[Keys[i], "pagenum"]) > Output_file
}
close(Output_file)
}
{
# Remove duplicates, which can happen
if ($0 in Seen)
next
Seen[$0] = TRUE
$0 = substr($0, 7) # remove leading \entry
initial = extract_initial($0)
numfields = field_split($0, fields, "{", "}", Command_char)
if (numfields != 3)
fatal(_"%s:%d: Bad entry; expected three fields, not %d\n",
FILENAME, FNR, numfields)
key = fields[1]
pagenum = fields[2]
text = fields[3]
if (! ((key, "text") in Data)) {
# first time we've seen this full line
Keys[++Entries] = key
Data[key, "text"] = text
Data[key, "pagenum"] = pagenum
Data[key, "initial"] = initial
} else
# seen this key before; add the current pagenum
# unless we've already seen that too.
if ( Data[key, "pagenum"] != pagenum \
&& Data[key, "pagenum"] !~ (", " pagenum "$")) {
Data[key, "pagenum"] = Data[key, "pagenum"] ", " pagenum
}
if (! Do_initials) {
if (Prev_initial == "")
Prev_initial = initial
else if (initial != Prev_initial)
Do_initials = TRUE
}
}
function extract_initial(key, initial, nextchar, i, l, kchars)
{
l = char_split(key, kchars)
if (l >= 3 && kchars[2] == "{") {
bracecount = 1
i = 3
while (bracecount > 0 && i <= l) {
if (kchars[i] == "{")
bracecount++
else if (kchars[i] == "}")
bracecount--
i++
}
if (i > l)
fatal(_"%s:%d: Bad key %s in record\n", FILENAME, FNR, key)
initial = substr(key, 2, i - 2)
} else if (kchars[2] == Command_char) {
nextchar = kchars[3]
if (initial == Command_char && (nextchar == "{" || nextchar == "}"))
initial = substr(key, 2, 3)
else {
initial = toupper(nextchar)
}
} else {
initial = toupper(kchars[2])
}
return initial
}
function field_split( \
record, fields, start, end, com_ch, # parameters
chars, numchars, out, delim_count, i, j, k) # locals
{
del_array(fields)
numchars = char_split(record, chars)
j = 1 # index into fields
k = 1 # index into out
delim_count = 1
for (i = 2; i <= numchars; i++) {
if (chars[i] == com_ch) {
if (index(Special_chars, chars[i+1]) != 0) {
out[k++] = chars[i+1]
i++
} else
out[k++] = chars[i]
} else if (chars[i] == start) {
delim_count++
out[k++] = chars[i]
} else if (chars[i] == end) {
delim_count--
if (delim_count == 0) {
fields[j++] = join(out, 1, k-1, SUBSEP)
del_array(out) # reset for next time through
k = 1
i++
if (i <= numchars && chars[i] != start)
fatal(_"%s:%d: Bad entry; expected %s at column %d\n",
FILENAME, FNR, start, i)
delim_count = 1
} else
out[k++] = chars[i]
} else
out[k++] = chars[i]
if (j == 3) { # Per Karl, just grab the whole rest of the line
# extract everything between the outer delimiters
fields[3] = substr(record, i + 1, numchars - i - 1)
break
}
}
return j # num fields
}
function print_initial(initial)
{
if (Do_initials) {
if (index(Special_chars, initial) != 0)
initial = Command_char initial
printf("%cinitial {%s}\n",
Command_char, initial) > Output_file
}
}
BEGIN {
for (i = 0; i < 256; i++) {
c = sprintf("%c", i)
Ordval[c] = i # map character to value
if ((n = isdigit(c)) > 0) {
Ordval[c] += 512
}
}
# Give both 'A' and 'a' the same code
i = Ordval["A"]
j = Ordval["Z"]
for (; i <= j; i++) {
c = sprintf("%c", i)
# In ASCII, 'A' is before 'a', so this is
# the right order to check
#
# Checking isupper() lets this work for EBCDIC, too.
if (isupper(c)) {
Ordval[c] += 512
Ordval[tolower(c)] = Ordval[c]
}
}
}
function less_than(left, right, len_l, len_r, len, chars_l, chars_r)
{
len_l = length(left)
len_r = length(right)
len = (len_l < len_r ? len_l : len_r)
char_split(left, chars_l)
char_split(right, chars_r)
for (i = 1; i <= len; i++) {
if (isalpha(chars_l[i]) && isalpha(chars_r[i])) {
# same char different case
# upper case comes out last
if (chars_l[i] != chars_r[i] &&
tolower(chars_l[i]) == tolower(chars_r[i])) {
if ( i != len \
&& (isalpha(chars_l[i+1]) \
|| isalpha(chars_r[i+1])))
continue
if (chars_l[i] > chars_r[i])
return 1
return 0
}
# same case, different char,
# or different case, different char:
# letter order wins
if (Ordval[chars_l[i]] < Ordval[chars_r[i]])
return 1
if (Ordval[chars_l[i]] > Ordval[chars_r[i]])
return 0
# equal, keep going
continue
}
# letter and something else, or two non-letters
# letter order wins
if (Ordval[chars_l[i]] < Ordval[chars_r[i]])
return 1
if (Ordval[chars_l[i]] > Ordval[chars_r[i]])
return 0
# equal, keep going
}
# equal so far, shorter one wins
if (len_l < len_r)
return 1
return 0
}