Module unicode

This module provides support to handle the Unicode UTF-8 encoding.

Types

RuneImpl = int
  Source
Rune = distinct RuneImpl
type that can hold any Unicode character   Source
Rune16 = distinct int16
16 bit Unicode character   Source

Procs

proc `<=%`(a, b: Rune): bool {.raises: [], tags: [].}
  Source
proc `<%`(a, b: Rune): bool {.raises: [], tags: [].}
  Source
proc `==`(a, b: Rune): bool {.raises: [], tags: [].}
  Source
proc runeLen(s: string): int {.gcsafe, extern: "nuc$1", raises: [], tags: [].}
returns the number of Unicode characters of the string s.   Source
proc runeLenAt(s: string; i: Natural): int {.raises: [], tags: [].}
returns the number of bytes the rune starting at s[i] takes.   Source
proc validateUtf8(s: string): int {.raises: [], tags: [].}
returns the position of the invalid byte in s if the string s does not hold valid UTF-8 data. Otherwise -1 is returned.   Source
proc runeAt(s: string; i: Natural): Rune {.raises: [], tags: [].}
returns the unicode character in s at byte index i   Source
proc toUTF8(c: Rune): string {.gcsafe, extern: "nuc$1", raises: [], tags: [].}
converts a rune into its UTF8 representation   Source
proc `$`(rune: Rune): string {.raises: [], tags: [].}
converts a rune to a string   Source
proc `$`(runes: seq[Rune]): string {.raises: [], tags: [].}
converts a sequence of runes to a string   Source
proc toLower(c: Rune): Rune {.gcsafe, extern: "nuc$1", procvar, raises: [], 
                              tags: [].}
Converts c into lower case. This works for any Unicode character. If possible, prefer toLower over toUpper.   Source
proc toUpper(c: Rune): Rune {.gcsafe, extern: "nuc$1", procvar, raises: [], 
                              tags: [].}
Converts c into upper case. This works for any Unicode character. If possible, prefer toLower over toUpper.   Source
proc toTitle(c: Rune): Rune {.gcsafe, extern: "nuc$1", procvar, raises: [], 
                              tags: [].}
  Source
proc isLower(c: Rune): bool {.gcsafe, extern: "nuc$1", procvar, raises: [], 
                              tags: [].}
returns true iff c is a lower case Unicode character If possible, prefer isLower over isUpper.   Source
proc isUpper(c: Rune): bool {.gcsafe, extern: "nuc$1", procvar, raises: [], 
                              tags: [].}
returns true iff c is a upper case Unicode character If possible, prefer isLower over isUpper.   Source
proc isAlpha(c: Rune): bool {.gcsafe, extern: "nuc$1", procvar, raises: [], 
                              tags: [].}
returns true iff c is an alpha Unicode character (i.e. a letter)   Source
proc isTitle(c: Rune): bool {.gcsafe, extern: "nuc$1", procvar, raises: [], 
                              tags: [].}
  Source
proc isWhiteSpace(c: Rune): bool {.gcsafe, extern: "nuc$1", procvar, raises: [], 
                                   tags: [].}
returns true iff c is a Unicode whitespace character   Source
proc isCombining(c: Rune): bool {.gcsafe, extern: "nuc$1", procvar, raises: [], 
                                  tags: [].}
returns true iff c is a Unicode combining character   Source
proc cmpRunesIgnoreCase(a, b: string): int {.gcsafe, extern: "nuc$1", procvar, 
    raises: [], tags: [].}
compares two UTF8 strings and ignores the case. Returns:

0 iff a == b
< 0 iff a < b
> 0 iff a > b

  Source
proc reversed(s: string): string {.raises: [], tags: [].}
returns the reverse of s, interpreting it as unicode characters. Unicode combining characters are correctly interpreted as well:
assert reversed("Reverse this!") == "!siht esreveR"
assert reversed("先秦兩漢") == "漢兩秦先"
assert reversed("as⃝df̅") == "f̅ds⃝a"
assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
  Source

Iterators

iterator runes(s: string): Rune {.raises: [], tags: [].}
iterates over any unicode character of the string s.   Source

Templates

template fastRuneAt(s: string; i: int; result: expr; doInc = true)
Returns the unicode character s[i] in result. If doInc == true i is incremented by the number of bytes that have been processed.   Source