| '\" |
| '\" Copyright (c) 1993 The Regents of the University of California. |
| '\" Copyright (c) 1994-1996 Sun Microsystems, Inc. |
| '\" |
| '\" See the file "license.terms" for information on usage and redistribution |
| '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. |
| '\" |
| '\" RCS: @(#) $Id: string.n,v 1.17 2002/07/05 07:23:45 hobbs Exp $ |
| '\" |
| '\" The definitions below are for supplemental macros used in Tcl/Tk |
| '\" manual entries. |
| '\" |
| '\" .AP type name in/out ?indent? |
| '\" Start paragraph describing an argument to a library procedure. |
| '\" type is type of argument (int, etc.), in/out is either "in", "out", |
| '\" or "in/out" to describe whether procedure reads or modifies arg, |
| '\" and indent is equivalent to second arg of .IP (shouldn't ever be |
| '\" needed; use .AS below instead) |
| '\" |
| '\" .AS ?type? ?name? |
| '\" Give maximum sizes of arguments for setting tab stops. Type and |
| '\" name are examples of largest possible arguments that will be passed |
| '\" to .AP later. If args are omitted, default tab stops are used. |
| '\" |
| '\" .BS |
| '\" Start box enclosure. From here until next .BE, everything will be |
| '\" enclosed in one large box. |
| '\" |
| '\" .BE |
| '\" End of box enclosure. |
| '\" |
| '\" .CS |
| '\" Begin code excerpt. |
| '\" |
| '\" .CE |
| '\" End code excerpt. |
| '\" |
| '\" .VS ?version? ?br? |
| '\" Begin vertical sidebar, for use in marking newly-changed parts |
| '\" of man pages. The first argument is ignored and used for recording |
| '\" the version when the .VS was added, so that the sidebars can be |
| '\" found and removed when they reach a certain age. If another argument |
| '\" is present, then a line break is forced before starting the sidebar. |
| '\" |
| '\" .VE |
| '\" End of vertical sidebar. |
| '\" |
| '\" .DS |
| '\" Begin an indented unfilled display. |
| '\" |
| '\" .DE |
| '\" End of indented unfilled display. |
| '\" |
| '\" .SO |
| '\" Start of list of standard options for a Tk widget. The |
| '\" options follow on successive lines, in four columns separated |
| '\" by tabs. |
| '\" |
| '\" .SE |
| '\" End of list of standard options for a Tk widget. |
| '\" |
| '\" .OP cmdName dbName dbClass |
| '\" Start of description of a specific option. cmdName gives the |
| '\" option's name as specified in the class command, dbName gives |
| '\" the option's name in the option database, and dbClass gives |
| '\" the option's class in the option database. |
| '\" |
| '\" .UL arg1 arg2 |
| '\" Print arg1 underlined, then print arg2 normally. |
| '\" |
| '\" RCS: @(#) $Id: man.macros,v 1.4 2000/08/25 06:18:32 ericm Exp $ |
| '\" |
| '\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages. |
| .if t .wh -1.3i ^B |
| .nr ^l \n(.l |
| .ad b |
| '\" # Start an argument description |
| .de AP |
| .ie !"\\$4"" .TP \\$4 |
| .el \{\ |
| . ie !"\\$2"" .TP \\n()Cu |
| . el .TP 15 |
| .\} |
| .ta \\n()Au \\n()Bu |
| .ie !"\\$3"" \{\ |
| \&\\$1 \\fI\\$2\\fP (\\$3) |
| .\".b |
| .\} |
| .el \{\ |
| .br |
| .ie !"\\$2"" \{\ |
| \&\\$1 \\fI\\$2\\fP |
| .\} |
| .el \{\ |
| \&\\fI\\$1\\fP |
| .\} |
| .\} |
| .. |
| '\" # define tabbing values for .AP |
| .de AS |
| .nr )A 10n |
| .if !"\\$1"" .nr )A \\w'\\$1'u+3n |
| .nr )B \\n()Au+15n |
| .\" |
| .if !"\\$2"" .nr )B \\w'\\$2'u+\\n()Au+3n |
| .nr )C \\n()Bu+\\w'(in/out)'u+2n |
| .. |
| .AS Tcl_Interp Tcl_CreateInterp in/out |
| '\" # BS - start boxed text |
| '\" # ^y = starting y location |
| '\" # ^b = 1 |
| .de BS |
| .br |
| .mk ^y |
| .nr ^b 1u |
| .if n .nf |
| .if n .ti 0 |
| .if n \l'\\n(.lu\(ul' |
| .if n .fi |
| .. |
| '\" # BE - end boxed text (draw box now) |
| .de BE |
| .nf |
| .ti 0 |
| .mk ^t |
| .ie n \l'\\n(^lu\(ul' |
| .el \{\ |
| .\" Draw four-sided box normally, but don't draw top of |
| .\" box if the box started on an earlier page. |
| .ie !\\n(^b-1 \{\ |
| \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' |
| .\} |
| .el \}\ |
| \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' |
| .\} |
| .\} |
| .fi |
| .br |
| .nr ^b 0 |
| .. |
| '\" # VS - start vertical sidebar |
| '\" # ^Y = starting y location |
| '\" # ^v = 1 (for troff; for nroff this doesn't matter) |
| .de VS |
| .if !"\\$2"" .br |
| .mk ^Y |
| .ie n 'mc \s12\(br\s0 |
| .el .nr ^v 1u |
| .. |
| '\" # VE - end of vertical sidebar |
| .de VE |
| .ie n 'mc |
| .el \{\ |
| .ev 2 |
| .nf |
| .ti 0 |
| .mk ^t |
| \h'|\\n(^lu+3n'\L'|\\n(^Yu-1v\(bv'\v'\\n(^tu+1v-\\n(^Yu'\h'-|\\n(^lu+3n' |
| .sp -1 |
| .fi |
| .ev |
| .\} |
| .nr ^v 0 |
| .. |
| '\" # Special macro to handle page bottom: finish off current |
| '\" # box/sidebar if in box/sidebar mode, then invoked standard |
| '\" # page bottom macro. |
| .de ^B |
| .ev 2 |
| 'ti 0 |
| 'nf |
| .mk ^t |
| .if \\n(^b \{\ |
| .\" Draw three-sided box if this is the box's first page, |
| .\" draw two sides but no top otherwise. |
| .ie !\\n(^b-1 \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c |
| .el \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c |
| .\} |
| .if \\n(^v \{\ |
| .nr ^x \\n(^tu+1v-\\n(^Yu |
| \kx\h'-\\nxu'\h'|\\n(^lu+3n'\ky\L'-\\n(^xu'\v'\\n(^xu'\h'|0u'\c |
| .\} |
| .bp |
| 'fi |
| .ev |
| .if \\n(^b \{\ |
| .mk ^y |
| .nr ^b 2 |
| .\} |
| .if \\n(^v \{\ |
| .mk ^Y |
| .\} |
| .. |
| '\" # DS - begin display |
| .de DS |
| .RS |
| .nf |
| .sp |
| .. |
| '\" # DE - end display |
| .de DE |
| .fi |
| .RE |
| .sp |
| .. |
| '\" # SO - start of list of standard options |
| .de SO |
| .SH "STANDARD OPTIONS" |
| .LP |
| .nf |
| .ta 5.5c 11c |
| .ft B |
| .. |
| '\" # SE - end of list of standard options |
| .de SE |
| .fi |
| .ft R |
| .LP |
| See the \\fBoptions\\fR manual entry for details on the standard options. |
| .. |
| '\" # OP - start of full description for a single option |
| .de OP |
| .LP |
| .nf |
| .ta 4c |
| Command-Line Name: \\fB\\$1\\fR |
| Database Name: \\fB\\$2\\fR |
| Database Class: \\fB\\$3\\fR |
| .fi |
| .IP |
| .. |
| '\" # CS - begin code excerpt |
| .de CS |
| .RS |
| .nf |
| .ta .25i .5i .75i 1i |
| .. |
| '\" # CE - end code excerpt |
| .de CE |
| .fi |
| .RE |
| .. |
| .de UL |
| \\$1\l'|0\(ul'\\$2 |
| .. |
| .TH string n 8.1 Tcl "Tcl Built-In Commands" |
| .BS |
| '\" Note: do not modify the .SH NAME line immediately below! |
| .SH NAME |
| string \- Manipulate strings |
| .SH SYNOPSIS |
| \fBstring \fIoption arg \fR?\fIarg ...?\fR |
| .BE |
| |
| .SH DESCRIPTION |
| .PP |
| Performs one of several string operations, depending on \fIoption\fR. |
| The legal \fIoption\fRs (which may be abbreviated) are: |
| .TP |
| \fBstring bytelength \fIstring\fR |
| Returns a decimal string giving the number of bytes used to represent |
| \fIstring\fR in memory. Because UTF\-8 uses one to three bytes to |
| represent Unicode characters, the byte length will not be the same as |
| the character length in general. The cases where a script cares about |
| the byte length are rare. In almost all cases, you should use the |
| \fBstring length\fR operation (including determining the length of a |
| Tcl ByteArray object). Refer to the \fBTcl_NumUtfChars\fR manual |
| entry for more details on the UTF\-8 representation. |
| .TP |
| \fBstring compare\fR ?\fB\-nocase\fR? ?\fB\-length int\fR? \fIstring1 string2\fR |
| Perform a character-by-character comparison of strings \fIstring1\fR |
| and \fIstring2\fR. Returns \-1, 0, or 1, depending on whether |
| \fIstring1\fR is lexicographically less than, equal to, or greater |
| than \fIstring2\fR. If \fB\-length\fR is specified, then only the |
| first \fIlength\fR characters are used in the comparison. If |
| \fB\-length\fR is negative, it is ignored. If \fB\-nocase\fR is |
| specified, then the strings are compared in a case-insensitive manner. |
| .TP |
| \fBstring equal\fR ?\fB\-nocase\fR? ?\fB-length int\fR? \fIstring1 string2\fR |
| Perform a character-by-character comparison of strings \fIstring1\fR |
| and \fIstring2\fR. Returns 1 if \fIstring1\fR and \fIstring2\fR are |
| identical, or 0 when not. If \fB\-length\fR is specified, then only |
| the first \fIlength\fR characters are used in the comparison. If |
| \fB\-length\fR is negative, it is ignored. If \fB\-nocase\fR is |
| specified, then the strings are compared in a case-insensitive manner. |
| .TP |
| \fBstring first \fIstring1 string2\fR ?\fIstartIndex\fR? |
| Search \fIstring2\fR for a sequence of characters that exactly match |
| the characters in \fIstring1\fR. If found, return the index of the |
| first character in the first such match within \fIstring2\fR. If not |
| found, return \-1. If \fIstartIndex\fR is specified (in any of the |
| forms accepted by the \fBindex\fR method), then the search is |
| constrained to start with the character in \fIstring2\fR specified by |
| the index. For example, |
| .RS |
| .CS |
| \fBstring first a 0a23456789abcdef 5\fR |
| .CE |
| will return \fB10\fR, but |
| .CS |
| \fBstring first a 0123456789abcdef 11\fR |
| .CE |
| will return \fB\-1\fR. |
| .RE |
| .TP |
| \fBstring index \fIstring charIndex\fR |
| Returns the \fIcharIndex\fR'th character of the \fIstring\fR argument. |
| A \fIcharIndex\fR of 0 corresponds to the first character of the |
| string. \fIcharIndex\fR may be specified as follows: |
| .RS |
| .IP \fIinteger\fR 10 |
| The char specified at this integral index. |
| .IP \fBend\fR 10 |
| The last char of the string. |
| .IP \fBend\-\fIinteger\fR 10 |
| The last char of the string minus the specified integer offset |
| (e.g. \fBend\-1\fR would refer to the "c" in "abcd"). |
| .PP |
| If \fIcharIndex\fR is less than 0 or greater than or equal to the |
| length of the string then an empty string is returned. |
| .RE |
| .TP |
| \fBstring is \fIclass\fR ?\fB\-strict\fR? ?\fB\-failindex \fIvarname\fR? \fIstring\fR |
| Returns 1 if \fIstring\fR is a valid member of the specified character |
| class, otherwise returns 0. If \fB\-strict\fR is specified, then an |
| empty string returns 0, otherwise and empty string will return 1 on |
| any class. If \fB\-failindex\fR is specified, then if the function |
| returns 0, the index in the string where the class was no longer valid |
| will be stored in the variable named \fIvarname\fR. The \fIvarname\fR |
| will not be set if the function returns 1. The following character |
| classes are recognized (the class name can be abbreviated): |
| .RS |
| .IP \fBalnum\fR 10 |
| Any Unicode alphabet or digit character. |
| .IP \fBalpha\fR 10 |
| Any Unicode alphabet character. |
| .IP \fBascii\fR 10 |
| Any character with a value less than \\u0080 (those that are in the |
| 7\-bit ascii range). |
| .IP \fBboolean\fR 10 |
| Any of the forms allowed to \fBTcl_GetBoolean\fR. |
| .IP \fBcontrol\fR 10 |
| Any Unicode control character. |
| .IP \fBdigit\fR 10 |
| Any Unicode digit character. Note that this includes characters |
| outside of the [0\-9] range. |
| .IP \fBdouble\fR 10 |
| Any of the valid forms for a double in Tcl, with optional surrounding |
| whitespace. In case of under/overflow in the value, 0 is returned and |
| the \fIvarname\fR will contain \-1. |
| .IP \fBfalse\fR 10 |
| Any of the forms allowed to \fBTcl_GetBoolean\fR where the value is |
| false. |
| .IP \fBgraph\fR 10 |
| Any Unicode printing character, except space. |
| .IP \fBinteger\fR 10 |
| Any of the valid forms for an integer in Tcl, with optional |
| surrounding whitespace. In case of under/overflow in the value, 0 is |
| returned and the \fIvarname\fR will contain \-1. |
| .IP \fBlower\fR 10 |
| Any Unicode lower case alphabet character. |
| .IP \fBprint\fR 10 |
| Any Unicode printing character, including space. |
| .IP \fBpunct\fR 10 |
| Any Unicode punctuation character. |
| .IP \fBspace\fR 10 |
| Any Unicode space character. |
| .IP \fBtrue\fR 10 |
| Any of the forms allowed to \fBTcl_GetBoolean\fR where the value is |
| true. |
| .IP \fBupper\fR 10 |
| Any upper case alphabet character in the Unicode character set. |
| .IP \fBwordchar\fR 10 |
| Any Unicode word character. That is any alphanumeric character, and |
| any Unicode connector punctuation characters (e.g. underscore). |
| .IP \fBxdigit\fR 10 |
| Any hexadecimal digit character ([0\-9A\-Fa\-f]). |
| .PP |
| In the case of \fBboolean\fR, \fBtrue\fR and \fBfalse\fR, if the |
| function will return 0, then the \fIvarname\fR will always be set to |
| 0, due to the varied nature of a valid boolean value. |
| .RE |
| .TP |
| \fBstring last \fIstring1 string2\fR ?\fIlastIndex\fR? |
| Search \fIstring2\fR for a sequence of characters that exactly match |
| the characters in \fIstring1\fR. If found, return the index of the |
| first character in the last such match within \fIstring2\fR. If there |
| is no match, then return \-1. If \fIlastIndex\fR is specified (in any |
| of the forms accepted by the \fBindex\fR method), then only the |
| characters in \fIstring2\fR at or before the specified \fIlastIndex\fR |
| will be considered by the search. For example, |
| .RS |
| .CS |
| \fBstring last a 0a23456789abcdef 15\fR |
| .CE |
| will return \fB10\fR, but |
| .CS |
| \fBstring last a 0a23456789abcdef 9\fR |
| .CE |
| will return \fB1\fR. |
| .RE |
| .TP |
| \fBstring length \fIstring\fR |
| Returns a decimal string giving the number of characters in |
| \fIstring\fR. Note that this is not necessarily the same as the |
| number of bytes used to store the string. If the object is a |
| ByteArray object (such as those returned from reading a binary encoded |
| channel), then this will return the actual byte length of the object. |
| .TP |
| \fBstring map\fR ?\fB\-nocase\fR? \fIcharMap string\fR |
| Replaces characters in \fIstring\fR based on the key-value pairs in |
| \fIcharMap\fR. \fIcharMap\fR is a list of \fIkey value key value ...\fR |
| as in the form returned by \fBarray get\fR. Each instance of a |
| key in the string will be replaced with its corresponding value. If |
| \fB\-nocase\fR is specified, then matching is done without regard to |
| case differences. Both \fIkey\fR and \fIvalue\fR may be multiple |
| characters. Replacement is done in an ordered manner, so the key |
| appearing first in the list will be checked first, and so on. |
| \fIstring\fR is only iterated over once, so earlier key replacements |
| will have no affect for later key matches. For example, |
| .RS |
| .CS |
| \fBstring map {abc 1 ab 2 a 3 1 0} 1abcaababcabababc\fR |
| .CE |
| will return the string \fB01321221\fR. |
| .RE |
| .TP |
| \fBstring match\fR ?\fB\-nocase\fR? \fIpattern\fR \fIstring\fR |
| See if \fIpattern\fR matches \fIstring\fR; return 1 if it does, 0 if |
| it doesn't. If \fB\-nocase\fR is specified, then the pattern attempts |
| to match against the string in a case insensitive manner. For the two |
| strings to match, their contents must be identical except that the |
| following special sequences may appear in \fIpattern\fR: |
| .RS |
| .IP \fB*\fR 10 |
| Matches any sequence of characters in \fIstring\fR, including a null |
| string. |
| .IP \fB?\fR 10 |
| Matches any single character in \fIstring\fR. |
| .IP \fB[\fIchars\fB]\fR 10 |
| Matches any character in the set given by \fIchars\fR. If a sequence |
| of the form \fIx\fB\-\fIy\fR appears in \fIchars\fR, then any |
| character between \fIx\fR and \fIy\fR, inclusive, will match. When |
| used with \fB\-nocase\fR, the end points of the range are converted to |
| lower case first. Whereas {[A\-z]} matches '_' when matching |
| case-sensitively ('_' falls between the 'Z' and 'a'), with |
| \fB\-nocase\fR this is considered like {[A\-Za\-z]} (and probably what |
| was meant in the first place). |
| .IP \fB\e\fIx\fR 10 |
| Matches the single character \fIx\fR. This provides a way of avoiding |
| the special interpretation of the characters \fB*?[]\e\fR in |
| \fIpattern\fR. |
| .RE |
| .TP |
| \fBstring range \fIstring first last\fR |
| Returns a range of consecutive characters from \fIstring\fR, starting |
| with the character whose index is \fIfirst\fR and ending with the |
| character whose index is \fIlast\fR. An index of 0 refers to the first |
| character of the string. \fIfirst\fR and \fIlast\fR may be specified |
| as for the \fBindex\fR method. If \fIfirst\fR is less than zero then |
| it is treated as if it were zero, and if \fIlast\fR is greater than or |
| equal to the length of the string then it is treated as if it were |
| \fBend\fR. If \fIfirst\fR is greater than \fIlast\fR then an empty |
| string is returned. |
| .TP |
| \fBstring repeat \fIstring count\fR |
| Returns \fIstring\fR repeated \fIcount\fR number of times. |
| .TP |
| \fBstring replace \fIstring first last\fR ?\fInewstring\fR? |
| Removes a range of consecutive characters from \fIstring\fR, starting |
| with the character whose index is \fIfirst\fR and ending with the |
| character whose index is \fIlast\fR. An index of 0 refers to the |
| first character of the string. \fIFirst\fR and \fIlast\fR may be |
| specified as for the \fBindex\fR method. If \fInewstring\fR is |
| specified, then it is placed in the removed character range. If |
| \fIfirst\fR is less than zero then it is treated as if it were zero, |
| and if \fIlast\fR is greater than or equal to the length of the string |
| then it is treated as if it were \fBend\fR. If \fIfirst\fR is greater |
| than \fIlast\fR or the length of the initial string, or \fIlast\fR is |
| less than 0, then the initial string is returned untouched. |
| .TP |
| \fBstring tolower \fIstring\fR ?\fIfirst\fR? ?\fIlast\fR? |
| Returns a value equal to \fIstring\fR except that all upper (or title) |
| case letters have been converted to lower case. If \fIfirst\fR is |
| specified, it refers to the first char index in the string to start |
| modifying. If \fIlast\fR is specified, it refers to the char index in |
| the string to stop at (inclusive). \fIfirst\fR and \fIlast\fR may be |
| specified as for the \fBindex\fR method. |
| .TP |
| \fBstring totitle \fIstring\fR ?\fIfirst\fR? ?\fIlast\fR? |
| Returns a value equal to \fIstring\fR except that the first character |
| in \fIstring\fR is converted to its Unicode title case variant (or |
| upper case if there is no title case variant) and the rest of the |
| string is converted to lower case. If \fIfirst\fR is specified, it |
| refers to the first char index in the string to start modifying. If |
| \fIlast\fR is specified, it refers to the char index in the string to |
| stop at (inclusive). \fIfirst\fR and \fIlast\fR may be specified as |
| for the \fBindex\fR method. |
| .TP |
| \fBstring toupper \fIstring\fR ?\fIfirst\fR? ?\fIlast\fR? |
| Returns a value equal to \fIstring\fR except that all lower (or title) |
| case letters have been converted to upper case. If \fIfirst\fR is |
| specified, it refers to the first char index in the string to start |
| modifying. If \fIlast\fR is specified, it refers to the char index in |
| the string to stop at (inclusive). \fIfirst\fR and \fIlast\fR may be |
| specified as for the \fBindex\fR method. |
| .TP |
| \fBstring trim \fIstring\fR ?\fIchars\fR? |
| Returns a value equal to \fIstring\fR except that any leading or |
| trailing characters from the set given by \fIchars\fR are removed. If |
| \fIchars\fR is not specified then white space is removed (spaces, |
| tabs, newlines, and carriage returns). |
| .TP |
| \fBstring trimleft \fIstring\fR ?\fIchars\fR? |
| Returns a value equal to \fIstring\fR except that any leading |
| characters from the set given by \fIchars\fR are removed. If |
| \fIchars\fR is not specified then white space is removed (spaces, |
| tabs, newlines, and carriage returns). |
| .TP |
| \fBstring trimright \fIstring\fR ?\fIchars\fR? |
| Returns a value equal to \fIstring\fR except that any trailing |
| characters from the set given by \fIchars\fR are removed. If |
| \fIchars\fR is not specified then white space is removed (spaces, |
| tabs, newlines, and carriage returns). |
| .TP |
| \fBstring wordend \fIstring charIndex\fR |
| Returns the index of the character just after the last one in the word |
| containing character \fIcharIndex\fR of \fIstring\fR. \fIcharIndex\fR |
| may be specified as for the \fBindex\fR method. A word is |
| considered to be any contiguous range of alphanumeric (Unicode letters |
| or decimal digits) or underscore (Unicode connector punctuation) |
| characters, or any single character other than these. |
| .TP |
| \fBstring wordstart \fIstring charIndex\fR |
| Returns the index of the first character in the word containing |
| character \fIcharIndex\fR of \fIstring\fR. \fIcharIndex\fR may be |
| specified as for the \fBindex\fR method. A word is considered to be any |
| contiguous range of alphanumeric (Unicode letters or decimal digits) |
| or underscore (Unicode connector punctuation) characters, or any |
| single character other than these. |
| |
| .SH "SEE ALSO" |
| expr(n), list(n) |
| |
| .SH KEYWORDS |
| case conversion, compare, index, match, pattern, string, word, equal, ctype |