W3cubDocs

Module strutils

This module contains various string utility routines. See the module re for regular expression support. See the module pegs for PEG support. This module is available for the JavaScript target.

Imports

parseutils, math, algorithm

Types

CharSet = set[char]
FloatFormatMode = enum ffDefault, ## use the shorter floating point notation ffDecimal, ## use decimal floating point notation ffScientific ## use scientific notation (using ``e`` character): the different modes of floating point formating
BinaryPrefixMode = enum bpIEC, bpColloquial: the different names for binary prefixes

Consts

Whitespace = {' ', '\x09', '\x0B', '\x0D', '\x0A', '\x0C'}

All the characters that count as whitespace.

Letters = {'A'..'Z', 'a'..'z'}

the set of letters

Digits = {'0'..'9'}

the set of digits

HexDigits = {'0'..'9', 'A'..'F', 'a'..'f'}

the set of hexadecimal digits

IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}

the set of characters an identifier can consist of

IdentStartChars = {'a'..'z', 'A'..'Z', '_'}

the set of characters an identifier can start with

NewLines = {'\x0D', '\x0A'}

the set of characters a newline terminator can start with

AllChars = {'\0'..'\xFF'}

A set with all the possible characters.

Not very useful by its own, you can use it to create inverted sets to make the find() proc find invalid characters in strings. Example:

let invalid = AllChars - Digits
doAssert "01234".find(invalid) == -1
doAssert "01A34".find(invalid) == 2

Procs

proc isAlphaAscii(c: char): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsAlphaAsciiChar", raises: [], tags: [].}

Checks whether or not c is alphabetical.

This checks a-z, A-Z ASCII characters only.

proc isAlphaNumeric(c: char): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsAlphaNumericChar", raises: [], tags: [].}

Checks whether or not c is alphanumeric.

This checks a-z, A-Z, 0-9 ASCII characters only.

proc isDigit(c: char): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsDigitChar", raises: [], tags: [].}

Checks whether or not c is a number.

This checks 0-9 ASCII characters only.

proc isSpaceAscii(c: char): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsSpaceAsciiChar", raises: [], tags: [].}

Checks whether or not c is a whitespace character.

proc isLowerAscii(c: char): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsLowerAsciiChar", raises: [], tags: [].}

Checks whether or not c is a lower case character.

This checks ASCII characters only.

proc isUpperAscii(c: char): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsUpperAsciiChar", raises: [], tags: [].}

Checks whether or not c is an upper case character.

This checks ASCII characters only.

proc isAlphaAscii(s: string): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsAlphaAsciiStr", raises: [], tags: [].}

Checks whether or not s is alphabetical.

This checks a-z, A-Z ASCII characters only. Returns true if all characters in s are alphabetic and there is at least one character in s.

proc isAlphaNumeric(s: string): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsAlphaNumericStr", raises: [], tags: [].}

Checks whether or not s is alphanumeric.

This checks a-z, A-Z, 0-9 ASCII characters only. Returns true if all characters in s are alpanumeric and there is at least one character in s.

proc isDigit(s: string): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsDigitStr", raises: [], tags: [].}

Checks whether or not s is a numeric value.

This checks 0-9 ASCII characters only. Returns true if all characters in s are numeric and there is at least one character in s.

proc isSpaceAscii(s: string): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsSpaceAsciiStr", raises: [], tags: [].}

Checks whether or not s is completely whitespace.

Returns true if all characters in s are whitespace characters and there is at least one character in s.

proc isLowerAscii(s: string): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsLowerAsciiStr", raises: [], tags: [].}

Checks whether or not s contains all lower case characters.

This checks ASCII characters only. Returns true if all characters in s are lower case and there is at least one character in s.

proc isUpperAscii(s: string): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsUpperAsciiStr", raises: [], tags: [].}

Checks whether or not s contains all upper case characters.

This checks ASCII characters only. Returns true if all characters in s are upper case and there is at least one character in s.

proc toLowerAscii(c: char): char {.noSideEffect, procvar, gcsafe, extern: "nsuToLowerAsciiChar", raises: [], tags: [].}

Converts c into lower case.

This works only for the letters A-Z. See unicode.toLower for a version that works for any Unicode character.

proc toLowerAscii(s: string): string {.noSideEffect, procvar, gcsafe, extern: "nsuToLowerAsciiStr", raises: [], tags: [].}

Converts s into lower case.

This works only for the letters A-Z. See unicode.toLower for a version that works for any Unicode character.

proc toUpperAscii(c: char): char {.noSideEffect, procvar, gcsafe, extern: "nsuToUpperAsciiChar", raises: [], tags: [].}

Converts c into upper case.

This works only for the letters A-Z. See unicode.toUpper for a version that works for any Unicode character.

proc toUpperAscii(s: string): string {.noSideEffect, procvar, gcsafe, extern: "nsuToUpperAsciiStr", raises: [], tags: [].}

Converts s into upper case.

This works only for the letters A-Z. See unicode.toUpper for a version that works for any Unicode character.

proc capitalizeAscii(s: string): string {.noSideEffect, procvar, gcsafe, extern: "nsuCapitalizeAscii", raises: [], tags: [].}

Converts the first character of s into upper case.

This works only for the letters A-Z.

proc isSpace(c: char): bool {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuIsSpaceChar", raises: [], tags: [].}

Checks whether or not c is a whitespace character.

Deprecated since version 0.15.0: use isSpaceAscii instead.

proc isLower(c: char): bool {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuIsLowerChar", raises: [], tags: [].}

Checks whether or not c is a lower case character.

This checks ASCII characters only.

Deprecated since version 0.15.0: use isLowerAscii instead.

proc isUpper(c: char): bool {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuIsUpperChar", raises: [], tags: [].}

Checks whether or not c is an upper case character.

This checks ASCII characters only.

Deprecated since version 0.15.0: use isUpperAscii instead.

proc isAlpha(c: char): bool {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuIsAlphaChar", raises: [], tags: [].}

Checks whether or not c is alphabetical.

This checks a-z, A-Z ASCII characters only.

Deprecated since version 0.15.0: use isAlphaAscii instead.

proc isAlpha(s: string): bool {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuIsAlphaStr", raises: [], tags: [].}

Checks whether or not s is alphabetical.

This checks a-z, A-Z ASCII characters only. Returns true if all characters in s are alphabetic and there is at least one character in s.

Deprecated since version 0.15.0: use isAlphaAscii instead.

proc isSpace(s: string): bool {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuIsSpaceStr", raises: [], tags: [].}

Checks whether or not s is completely whitespace.

Returns true if all characters in s are whitespace characters and there is at least one character in s.

Deprecated since version 0.15.0: use isSpaceAscii instead.

proc isLower(s: string): bool {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuIsLowerStr", raises: [], tags: [].}

Checks whether or not s contains all lower case characters.

This checks ASCII characters only. Returns true if all characters in s are lower case and there is at least one character in s.

Deprecated since version 0.15.0: use isLowerAscii instead.

proc isUpper(s: string): bool {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuIsUpperStr", raises: [], tags: [].}

Checks whether or not s contains all upper case characters.

This checks ASCII characters only. Returns true if all characters in s are upper case and there is at least one character in s.

Deprecated since version 0.15.0: use isUpperAscii instead.

proc toLower(c: char): char {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuToLowerChar", raises: [], tags: [].}

Converts c into lower case.

This works only for the letters A-Z. See unicode.toLower for a version that works for any Unicode character.

Deprecated since version 0.15.0: use toLowerAscii instead.

proc toLower(s: string): string {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuToLowerStr", raises: [], tags: [].}

Converts s into lower case.

This works only for the letters A-Z. See unicode.toLower for a version that works for any Unicode character.

Deprecated since version 0.15.0: use toLowerAscii instead.

proc toUpper(c: char): char {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuToUpperChar", raises: [], tags: [].}

Converts c into upper case.

This works only for the letters A-Z. See unicode.toUpper for a version that works for any Unicode character.

Deprecated since version 0.15.0: use toUpperAscii instead.

proc toUpper(s: string): string {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuToUpperStr", raises: [], tags: [].}

Converts s into upper case.

This works only for the letters A-Z. See unicode.toUpper for a version that works for any Unicode character.

Deprecated since version 0.15.0: use toUpperAscii instead.

proc capitalize(s: string): string {.noSideEffect, procvar, gcsafe, deprecated, extern: "nsuCapitalize", raises: [], tags: [].}

Converts the first character of s into upper case.

This works only for the letters A-Z.

Deprecated since version 0.15.0: use capitalizeAscii instead.

proc normalize(s: string): string {.noSideEffect, procvar, gcsafe, extern: "nsuNormalize", raises: [], tags: [].}

Normalizes the string s.

That means to convert it to lower case and remove any '_'. This is needed for Nim identifiers for example.

proc cmpIgnoreCase(a, b: string): int {.noSideEffect, gcsafe, extern: "nsuCmpIgnoreCase", procvar, raises: [], tags: [].}

Compares two strings in a case insensitive manner. Returns:

0 iff a == b
< 0 iff a < b
> 0 iff a > b

proc cmpIgnoreStyle(a, b: string): int {.noSideEffect, gcsafe, extern: "nsuCmpIgnoreStyle", procvar, raises: [], tags: [].}

Compares two strings normalized (i.e. case and underscores do not matter). Returns:

0 iff a == b
< 0 iff a < b
> 0 iff a > b

proc strip(s: string; leading = true; trailing = true; chars: set[char] = Whitespace): string {. noSideEffect, gcsafe, extern: "nsuStrip", raises: [], tags: [].}

Strips chars from s and returns the resulting string.

If leading is true, leading chars are stripped. If trailing is true, trailing chars are stripped.

proc toOctal(c: char): string {.noSideEffect, gcsafe, extern: "nsuToOctal", raises: [], tags: [].}

Converts a character c to its octal representation.

The resulting string may not have a leading zero. Its length is always exactly 3.

proc isNilOrEmpty(s: string): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsNilOrEmpty", raises: [], tags: [].}

Checks if s is nil or empty.

proc isNilOrWhitespace(s: string): bool {.noSideEffect, procvar, gcsafe, extern: "nsuIsNilOrWhitespace", raises: [], tags: [].}

Checks if s is nil or consists entirely of whitespace characters.

proc splitWhitespace(s: string): seq[string] {.noSideEffect, gcsafe, extern: "nsuSplitWhitespace", raises: [], tags: [].}

The same as the splitWhitespace iterator, but is a proc that returns a sequence of substrings.

proc splitLines(s: string): seq[string] {.noSideEffect, gcsafe, extern: "nsuSplitLines", raises: [], tags: [].}

The same as the splitLines iterator, but is a proc that returns a sequence of substrings.

proc countLines(s: string): int {.noSideEffect, gcsafe, extern: "nsuCountLines", raises: [], tags: [].}

Returns the number of lines in the string s.

This is the same as len(splitLines(s)), but much more efficient because it doesn't modify the string creating temporal objects. Every character literal newline combination (CR, LF, CR-LF) is supported.

In this context, a line is any string seperated by a newline combination. A line can be an empty string.

proc split(s: string; seps: set[char] = Whitespace; maxsplit: int = - 1): seq[string] {. noSideEffect, gcsafe, extern: "nsuSplitCharSet", raises: [], tags: [].}

The same as the split iterator, but is a proc that returns a sequence of substrings.

proc split(s: string; sep: char; maxsplit: int = - 1): seq[string] {.noSideEffect, gcsafe, extern: "nsuSplitChar", raises: [], tags: [].}

The same as the split iterator, but is a proc that returns a sequence of substrings.

proc split(s: string; sep: string; maxsplit: int = - 1): seq[string] {.noSideEffect, gcsafe, extern: "nsuSplitString", raises: [AssertionError], tags: [].}

Splits the string s into substrings using a string separator.

Substrings are separated by the string sep. This is a wrapper around the split iterator.

proc rsplit(s: string; seps: set[char] = Whitespace; maxsplit: int = - 1): seq[string] {. noSideEffect, gcsafe, extern: "nsuRSplitCharSet", raises: [], tags: [].}

The same as the rsplit iterator, but is a proc that returns a sequence of substrings.

A possible common use case for rsplit is path manipulation, particularly on systems that don't use a common delimiter.

For example, if a system had # as a delimiter, you could do the following to get the tail of the path:

var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1)

Results in tailSplit containing:

@["Root#Object#Method", "Index"]

proc rsplit(s: string; sep: char; maxsplit: int = - 1): seq[string] {.noSideEffect, gcsafe, extern: "nsuRSplitChar", raises: [], tags: [].}

The same as the split iterator, but is a proc that returns a sequence of substrings.

A possible common use case for rsplit is path manipulation, particularly on systems that don't use a common delimiter.

For example, if a system had # as a delimiter, you could do the following to get the tail of the path:

var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1)

Results in tailSplit containing:

@["Root#Object#Method", "Index"]

proc rsplit(s: string; sep: string; maxsplit: int = - 1): seq[string] {.noSideEffect, gcsafe, extern: "nsuRSplitString", raises: [], tags: [].}

The same as the split iterator, but is a proc that returns a sequence of substrings.

A possible common use case for rsplit is path manipulation, particularly on systems that don't use a common delimiter.

For example, if a system had # as a delimiter, you could do the following to get the tail of the path:

var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1)

Results in tailSplit containing:

@["Root#Object#Method", "Index"]

proc toHex(x: BiggestInt; len: Positive): string {.noSideEffect, gcsafe, extern: "nsuToHex", raises: [], tags: [].}

Converts x to its hexadecimal representation.

The resulting string will be exactly len characters long. No prefix like 0x is generated. x is treated as an unsigned value.

proc toHex[T](x: T): string

Shortcut for toHex(x, T.sizeOf * 2)

proc intToStr(x: int; minchars: Positive = 1): string {.noSideEffect, gcsafe, extern: "nsuIntToStr", raises: [], tags: [].}

Converts x to its decimal representation.

The resulting string will be minimally minchars characters long. This is achieved by adding leading zeros.

proc parseInt(s: string): int {.noSideEffect, procvar, gcsafe, extern: "nsuParseInt", raises: [OverflowError, ValueError], tags: [].}

Parses a decimal integer value contained in s.

If s is not a valid integer, ValueError is raised.

proc parseBiggestInt(s: string): BiggestInt {.noSideEffect, procvar, gcsafe, extern: "nsuParseBiggestInt", raises: [ValueError], tags: [].}

Parses a decimal integer value contained in s.

If s is not a valid integer, ValueError is raised.

proc parseUInt(s: string): uint {.noSideEffect, procvar, gcsafe, extern: "nsuParseUInt", raises: [ValueError], tags: [].}

Parses a decimal unsigned integer value contained in s.

If s is not a valid integer, ValueError is raised.

proc parseBiggestUInt(s: string): BiggestUInt {.noSideEffect, procvar, gcsafe, extern: "nsuParseBiggestUInt", raises: [ValueError], tags: [].}

Parses a decimal unsigned integer value contained in s.

If s is not a valid integer, ValueError is raised.

proc parseFloat(s: string): float {.noSideEffect, procvar, gcsafe, extern: "nsuParseFloat", raises: [ValueError], tags: [].}

Parses a decimal floating point value contained in s. If s is not a valid floating point number, ValueError is raised. NAN, INF, -INF are also supported (case insensitive comparison).

proc parseHexInt(s: string): int {.noSideEffect, procvar, gcsafe, extern: "nsuParseHexInt", raises: [ValueError], tags: [].}

Parses a hexadecimal integer value contained in s.

If s is not a valid integer, ValueError is raised. s can have one of the following optional prefixes: 0x, 0X, #. Underscores within s are ignored.

proc parseBool(s: string): bool {.raises: [ValueError], tags: [].}

Parses a value into a bool.

If s is one of the following values: y, yes, true, 1, on, then returns true. If s is one of the following values: n, no, false, 0, off, then returns false. If s is something else a ValueError exception is raised.

proc parseEnum[T: enum](s: string): T

Parses an enum T.

Raises ValueError for an invalid value in s. The comparison is done in a style insensitive way.

proc parseEnum[T: enum](s: string; default: T): T

Parses an enum T.

Uses default for an invalid value in s. The comparison is done in a style insensitive way.

proc repeat(c: char; count: Natural): string {.noSideEffect, gcsafe, extern: "nsuRepeatChar", raises: [], tags: [].}

Returns a string of length count consisting only of the character c. You can use this proc to left align strings. Example:

proc tabexpand(indent: int, text: string, tabsize: int = 4) =
  echo '\t'.repeat(indent div tabsize), ' '.repeat(indent mod tabsize),
      text

tabexpand(4, "At four")
tabexpand(5, "At five")
tabexpand(6, "At six")

proc repeat(s: string; n: Natural): string {.noSideEffect, gcsafe, extern: "nsuRepeatStr", raises: [], tags: [].}

Returns String s concatenated n times. Example:

echo "+++ STOP ".repeat(4), "+++"

proc repeatChar(count: Natural; c: char = ' '): string {.deprecated, raises: [], tags: [].}

deprecated: use repeat() or spaces()

proc repeatStr(count: Natural; s: string): string {.deprecated, raises: [], tags: [].}

deprecated: use repeat(string, count) or string.repeat(count)

proc align(s: string; count: Natural; padding = ' '): string {.noSideEffect, gcsafe, extern: "nsuAlignString", raises: [], tags: [].}

Aligns a string s with padding, so that it is of length count.

padding characters (by default spaces) are added before s resulting in right alignment. If s.len >= count, no spaces are added and s is returned unchanged. If you need to left align a string use the repeatChar proc. Example:

assert align("abc", 4) == " abc"
assert align("a", 0) == "a"
assert align("1232", 6) == "  1232"
assert align("1232", 6, '#') == "##1232"

proc wordWrap(s: string; maxLineWidth = 80; splitLongWords = true; seps: set[char] = Whitespace; newLine = "\x0D\x0A"): string {.noSideEffect, gcsafe, extern: "nsuWordWrap", raises: [], tags: [].}

Word wraps s.

proc indent(s: string; count: Natural; padding: string = " "): string {.noSideEffect, gcsafe, extern: "nsuIndent", raises: [], tags: [].}

Indents each line in s by count amount of padding.

Note: This does not preserve the new line characters used in s.

proc unindent(s: string; count: Natural; padding: string = " "): string {.noSideEffect, gcsafe, extern: "nsuUnindent", raises: [], tags: [].}

Unindents each line in s by count amount of padding.

Note: This does not preserve the new line characters used in s.

proc unindent(s: string): string {.noSideEffect, gcsafe, extern: "nsuUnindentAll", raises: [], tags: [].}

Removes all indentation composed of whitespace from each line in s.

For example:

const x = """
  Hello
  There
""".unindent()

doAssert x == "Hello\nThere\n"

proc startsWith(s, prefix: string): bool {.noSideEffect, gcsafe, extern: "nsuStartsWith", raises: [], tags: [].}

Returns true iff s starts with prefix.

If prefix == "" true is returned.

proc startsWith(s: string; prefix: char): bool {.noSideEffect, inline, raises: [], tags: [].}

Returns true iff s starts with prefix.

proc endsWith(s, suffix: string): bool {.noSideEffect, gcsafe, extern: "nsuEndsWith", raises: [], tags: [].}

Returns true iff s ends with suffix.

If suffix == "" true is returned.

proc endsWith(s: string; suffix: char): bool {.noSideEffect, inline, raises: [], tags: [].}

Returns true iff s ends with suffix.

proc continuesWith(s, substr: string; start: Natural): bool {.noSideEffect, gcsafe, extern: "nsuContinuesWith", raises: [], tags: [].}

Returns true iff s continues with substr at position start.

If substr == "" true is returned.

proc addSep(dest: var string; sep = ", "; startLen: Natural = 0) {.noSideEffect, inline, raises: [], tags: [].}

Adds a separator to dest only if its length is bigger than startLen.

A shorthand for:

if dest.len > startLen: add(dest, sep)

This is often useful for generating some code where the items need to be separated by sep. sep is only added if dest is longer than startLen. The following example creates a string describing an array of integers:

var arr = "["
for x in items([2, 3, 5, 7, 11]):
  addSep(arr, startLen=len("["))
  add(arr, $x)
add(arr, "]")

proc allCharsInSet(s: string; theSet: set[char]): bool {.raises: [], tags: [].}

Returns true iff each character of s is in the set theSet.

proc abbrev(s: string; possibilities: openArray[string]): int {.raises: [], tags: [].}

Returns the index of the first item in possibilities if not ambiguous.

Returns -1 if no item has been found and -2 if multiple items match.

proc join(a: openArray[string]; sep: string = ""): string {.noSideEffect, gcsafe, extern: "nsuJoinSep", raises: [], tags: [].}

Concatenates all strings in a separating them with sep.

proc join[T: not string](a: openArray[T]; sep: string = ""): string {.noSideEffect, gcsafe.}

Converts all elements in a to strings using $ and concatenates them with sep.

proc find(s, sub: string; start: Natural = 0; last: Natural = 0): int {.noSideEffect, gcsafe, extern: "nsuFindStr", raises: [], tags: [].}

Searches for sub in s inside range start..`last`. If last is unspecified, it defaults to s.high.