gobyexample/third_party/pygments/tests/examplefiles/string.jl

## core string functions ##

length(s::String) = error("you must implement length(",typeof(s),")")
next(s::String, i::Int) = error("you must implement next(",typeof(s),",Int)")
next(s::DirectIndexString, i::Int) = (s[i],i+1)
next(s::String, i::Integer) = next(s,int(i))

## generic supplied functions ##

start(s::String) = 1
done(s::String,i) = (i > length(s))
isempty(s::String) = done(s,start(s))
ref(s::String, i::Int) = next(s,i)[1]
ref(s::String, i::Integer) = s[int(i)]
ref(s::String, x::Real) = s[iround(x)]
ref{T<:Integer}(s::String, r::Range1{T}) = s[int(first(r)):int(last(r))]

symbol(s::String) = symbol(cstring(s))
string(s::String) = s

print(s::String) = for c=s; print(c); end
print(x...) = for i=x; print(i); end
println(args...) = print(args..., '\n')

show(s::String) = print_quoted(s)

(*)(s::String...) = strcat(s...)
(^)(s::String, r::Integer) = repeat(s,r)

size(s::String) = (length(s),)
size(s::String, d::Integer) = d==1 ? length(s) :
    error("in size: dimension ",d," out of range")

strlen(s::DirectIndexString) = length(s)
function strlen(s::String)
    i = start(s)
    if done(s,i)
        return 0
    end
    n = 1
    while true
        c, j = next(s,i)
        if done(s,j)
            return n
        end
        n += 1
        i = j
    end
end

isvalid(s::DirectIndexString, i::Integer) = (start(s) <= i <= length(s))
function isvalid(s::String, i::Integer)
    try
        next(s,i)
        true
    catch
        false
    end
end

prevind(s::DirectIndexString, i::Integer) = i-1
thisind(s::DirectIndexString, i::Integer) = i
nextind(s::DirectIndexString, i::Integer) = i+1

prevind(s::String, i::Integer) = thisind(s,thisind(s,i)-1)

function thisind(s::String, i::Integer)
    for j = i:-1:1
        if isvalid(s,j)
            return j
        end
    end
    return 0 # out of range
end

function nextind(s::String, i::Integer)
    for j = i+1:length(s)
        if isvalid(s,j)
            return j
        end
    end
    length(s)+1 # out of range
end

ind2chr(s::DirectIndexString, i::Integer) = i
chr2ind(s::DirectIndexString, i::Integer) = i

function ind2chr(s::String, i::Integer)
    s[i] # throws error if invalid
    j = 1
    k = start(s)
    while true
        c, l = next(s,k)
        if i <= k
            return j
        end
        j += 1
        k = l
    end
end

function chr2ind(s::String, i::Integer)
    if i < 1
        return i
    end
    j = 1
    k = start(s)
    while true
        c, l = next(s,k)
        if i == j
            return k
        end
        j += 1
        k = l
    end
end

function strchr(s::String, c::Char, i::Integer)
    i = nextind(s,i)
    while !done(s,i)
        d, j = next(s,i)
        if c == d
            return i
        end
        i = j
    end
    return 0
end
strchr(s::String, c::Char) = strchr(s, c, start(s))
contains(s::String, c::Char) = (strchr(s,c)!=0)

function chars(s::String)
    cx = Array(Char,strlen(s))
    i = 0
    for c in s
        cx[i += 1] = c
    end
    return cx
end

function cmp(a::String, b::String)
    i = start(a)
    j = start(b)
    while !done(a,i) && !done(b,i)
        c, i = next(a,i)
        d, j = next(b,j)
        if c != d
            return c < d ? -1 : +1
        end
    end
    done(a,i) && !done(b,j) ? -1 :
    !done(a,i) && done(b,j) ? +1 : 0
end

isequal(a::String, b::String) = cmp(a,b) == 0
isless(a::String, b::String)  = cmp(a,b) <  0

# faster comparisons for byte strings

cmp(a::ByteString, b::ByteString)     = lexcmp(a.data, b.data)
isequal(a::ByteString, b::ByteString) = length(a)==length(b) && cmp(a,b)==0

## character column width function ##

charwidth(c::Char) = max(0,int(ccall(:wcwidth, Int32, (Char,), c)))
strwidth(s::String) = (w=0; for c in s; w += charwidth(c); end; w)
strwidth(s::ByteString) = ccall(:u8_strwidth, Int, (Ptr{Uint8},), s.data)
# TODO: implement and use u8_strnwidth that takes a length argument

## generic string uses only length and next ##

type GenericString <: String
    string::String
end

length(s::GenericString) = length(s.string)
next(s::GenericString, i::Int) = next(s.string, i)

## plain old character arrays ##

type CharString <: String
    chars::Array{Char,1}

    CharString(a::Array{Char,1}) = new(a)
    CharString(c::Char...) = new([ c[i] | i=1:length(c) ])
end
CharString(x...) = CharString(map(char,x)...)

next(s::CharString, i::Int) = (s.chars[i], i+1)
length(s::CharString) = length(s.chars)
strlen(s::CharString) = length(s)

string(c::Char) = CharString(c)
string(c::Char, x::Char...) = CharString(c, x...)

## substrings reference original strings ##

type SubString <: String
    string::String
    offset::Int
    length::Int

    SubString(s::String, i::Int, j::Int) = new(s, i-1, j-i+1)
    SubString(s::SubString, i::Int, j::Int) =
        new(s.string, i-1+s.offset, j-i+1)
end
SubString(s::String, i::Integer, j::Integer) = SubString(s, int(i), int(j))

function next(s::SubString, i::Int)
    if i < 1 || i > s.length
        error("string index out of bounds")
    end
    c, i = next(s.string, i+s.offset)
    c, i-s.offset
end

length(s::SubString) = s.length
# TODO: strlen(s::SubString) = ??
# default implementation will work but it's slow
# can this be delegated efficiently somehow?
# that may require additional string interfaces

function ref(s::String, r::Range1{Int})
    if first(r) < 1 || length(s) < last(r)
        error("in substring slice: index out of range")
    end
    SubString(s, first(r), last(r))
end

## efficient representation of repeated strings ##

type RepString <: String
    string::String
    repeat::Integer
end

length(s::RepString) = length(s.string)*s.repeat
strlen(s::RepString) = strlen(s.string)*s.repeat

function next(s::RepString, i::Int)
    if i < 1 || i > length(s)
        error("string index out of bounds")
    end
    j = mod1(i,length(s.string))
    c, k = next(s.string, j)
    c, k-j+i
end

function repeat(s::String, r::Integer)
    r <  0 ? error("can't repeat a string ",r," times") :
    r == 0 ? "" :
    r == 1 ? s  :
    RepString(s,r)
end

## reversed strings without data movement ##

type RevString <: String
    string::String
end

length(s::RevString) = length(s.string)
strlen(s::RevString) = strlen(s.string)

start(s::RevString) = (n=length(s); n-thisind(s.string,n)+1)
function next(s::RevString, i::Int)
    n = length(s); j = n-i+1
    (s.string[j], n-thisind(s.string,j-1)+1)
end

reverse(s::String) = RevString(s)
reverse(s::RevString) = s.string

## ropes for efficient concatenation, etc. ##

# Idea: instead of this standard binary tree structure,
# how about we keep an array of substrings, with an
# offset array. We can do binary search on the offset
# array so we get O(log(n)) indexing time still, but we
# can compute the offsets lazily and avoid all the
# futzing around while the string is being constructed.

type RopeString <: String
    head::String
    tail::String
    depth::Int32
    length::Int

    RopeString(h::RopeString, t::RopeString) =
        depth(h.tail) + depth(t) < depth(h.head) ?
            RopeString(h.head, RopeString(h.tail, t)) :
            new(h, t, max(h.depth,t.depth)+1, length(h)+length(t))

    RopeString(h::RopeString, t::String) =
        depth(h.tail) < depth(h.head) ?
            RopeString(h.head, RopeString(h.tail, t)) :
            new(h, t, h.depth+1, length(h)+length(t))

    RopeString(h::String, t::RopeString) =
        depth(t.head) < depth(t.tail) ?
            RopeString(RopeString(h, t.head), t.tail) :
            new(h, t, t.depth+1, length(h)+length(t))

    RopeString(h::String, t::String) =
        new(h, t, 1, length(h)+length(t))
end

depth(s::String) = 0
depth(s::RopeString) = s.depth

function next(s::RopeString, i::Int)
    if i <= length(s.head)
        return next(s.head, i)
    else
        c, j = next(s.tail, i-length(s.head))
        return c, j+length(s.head)
    end
end

length(s::RopeString) = s.length
strlen(s::RopeString) = strlen(s.head) + strlen(s.tail)

strcat() = ""
strcat(s::String) = s
strcat(x...) = strcat(map(string,x)...)
strcat(s::String, t::String...) =
    (t = strcat(t...); isempty(s) ? t : isempty(t) ? s : RopeString(s, t))

print(s::RopeString) = print(s.head, s.tail)

## transformed strings ##

type TransformedString <: String
    transform::Function
    string::String
end

length(s::TransformedString) = length(s.string)
strlen(s::TransformedString) = strlen(s.string)

function next(s::TransformedString, i::Int)
    c, j = next(s.string,i)
    c = s.transform(c, i)
    return c, j
end

## uppercase and lowercase transformations ##

uppercase(c::Char) = ccall(:towupper, Char, (Char,), c)
lowercase(c::Char) = ccall(:towlower, Char, (Char,), c)

uppercase(s::String) = TransformedString((c,i)->uppercase(c), s)
lowercase(s::String) = TransformedString((c,i)->lowercase(c), s)

ucfirst(s::String) = TransformedString((c,i)->i==1 ? uppercase(c) : c, s)
lcfirst(s::String) = TransformedString((c,i)->i==1 ? lowercase(c) : c, s)

const uc = uppercase
const lc = lowercase

## string map ##

function map(f::Function, s::String)
    out = memio(length(s))
    for c in s
        write(out, f(c)::Char)
    end
    takebuf_string(out)
end

## conversion of general objects to strings ##

string(x) = print_to_string(show, x)
cstring(x...) = print_to_string(print, x...)

function cstring(p::Ptr{Uint8})
    p == C_NULL ? error("cannot convert NULL to string") :
    ccall(:jl_cstr_to_string, Any, (Ptr{Uint8},), p)::ByteString
end

## string promotion rules ##

promote_rule(::Type{UTF8String} , ::Type{ASCIIString}) = UTF8String
promote_rule(::Type{UTF8String} , ::Type{CharString} ) = UTF8String
promote_rule(::Type{ASCIIString}, ::Type{CharString} ) = UTF8String

## printing literal quoted string data ##

# TODO: this is really the inverse of print_unbackslashed

function print_quoted_literal(s::String)
    print('"')
    for c = s; c == '"' ? print("\\\"") : print(c); end
    print('"')
end

## string escaping & unescaping ##

escape_nul(s::String, i::Int) =
    !done(s,i) && '0' <= next(s,i)[1] <= '7' ? L"\x00" : L"\0"

is_hex_digit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
need_full_hex(s::String, i::Int) = !done(s,i) && is_hex_digit(next(s,i)[1])

function print_escaped(s::String, esc::String)
    i = start(s)
    while !done(s,i)
        c, j = next(s,i)
        c == '\0'       ? print(escape_nul(s,j)) :
        c == '\e'       ? print(L"\e") :
        c == '\\'       ? print("\\\\") :
        contains(esc,c) ? print('\\', c) :
        iswprint(c)     ? print(c) :
        7 <= c <= 13    ? print('\\', "abtnvfr"[c-6]) :
        c <= '\x7f'     ? print(L"\x", hex(c, 2)) :
        c <= '\uffff'   ? print(L"\u", hex(c, need_full_hex(s,j) ? 4 : 2)) :
                          print(L"\U", hex(c, need_full_hex(s,j) ? 8 : 4))
        i = j
    end
end

escape_string(s::String) = print_to_string(length(s), print_escaped, s, "\"")
print_quoted(s::String) = (print('"'); print_escaped(s, "\"\$"); print('"'))
#"  # work around syntax highlighting problem
quote_string(s::String) = print_to_string(length(s)+2, print_quoted, s)

# bare minimum unescaping function unescapes only given characters

function print_unescaped_chars(s::String, esc::String)
    if !contains(esc,'\\')
        esc = strcat("\\", esc)
    end
    i = start(s)
    while !done(s,i)
        c, i = next(s,i)
        if c == '\\' && !done(s,i) && contains(esc,s[i])
            c, i = next(s,i)
        end
        print(c)
    end
end

unescape_chars(s::String, esc::String) =
    print_to_string(length(s), print_unescaped_chars, s, esc)

# general unescaping of traditional C and Unicode escape sequences

function print_unescaped(s::String)
    i = start(s)
    while !done(s,i)
        c, i = next(s,i)
        if !done(s,i) && c == '\\'
            c, i = next(s,i)
            if c == 'x' || c == 'u' || c == 'U'
                n = k = 0
                m = c == 'x' ? 2 :
                    c == 'u' ? 4 : 8
                while (k+=1) <= m && !done(s,i)
                    c, j = next(s,i)
                    n = '0' <= c <= '9' ? n<<4 + c-'0' :
                        'a' <= c <= 'f' ? n<<4 + c-'a'+10 :
                        'A' <= c <= 'F' ? n<<4 + c-'A'+10 : break
                    i = j
                end
                if k == 1
                    error("\\x used with no following hex digits")
                end
                if m == 2 # \x escape sequence
                    write(uint8(n))
                else
                    print(char(n))
                end
            elseif '0' <= c <= '7'
                k = 1
                n = c-'0'
                while (k+=1) <= 3 && !done(s,i)
                    c, j = next(s,i)
                    n = '0' <= c <= '7' ? n<<3 + c-'0' : break
                    i = j
                end
                if n > 255
                    error("octal escape sequence out of range")
                end
                write(uint8(n))
            else
                print(c == 'a' ? '\a' :
                      c == 'b' ? '\b' :
                      c == 't' ? '\t' :
                      c == 'n' ? '\n' :
                      c == 'v' ? '\v' :
                      c == 'f' ? '\f' :
                      c == 'r' ? '\r' :
                      c == 'e' ? '\e' : c)
            end
        else
            print(c)
        end
    end
end

unescape_string(s::String) = print_to_string(length(s), print_unescaped, s)

## checking UTF-8 & ACSII validity ##

byte_string_classify(s::ByteString) =
    ccall(:u8_isvalid, Int32, (Ptr{Uint8}, Int), s.data, length(s))
    # 0: neither valid ASCII nor UTF-8
    # 1: valid ASCII
    # 2: valid UTF-8

is_valid_ascii(s::ByteString) = byte_string_classify(s) == 1
is_valid_utf8 (s::ByteString) = byte_string_classify(s) != 0

check_ascii(s::ByteString) = is_valid_ascii(s) ? s : error("invalid ASCII sequence")
check_utf8 (s::ByteString) = is_valid_utf8(s)  ? s : error("invalid UTF-8 sequence")

## string interpolation parsing ##

function _jl_interp_parse(s::String, unescape::Function, printer::Function)
    sx = {}
    i = j = start(s)
    while !done(s,j)
        c, k = next(s,j)
        if c == '$'
            if !isempty(s[i:j-1])
                push(sx, unescape(s[i:j-1]))
            end
            ex, j = parseatom(s,k)
            push(sx, ex)
            i = j
        elseif c == '\\' && !done(s,k)
            if s[k] == '$'
                if !isempty(s[i:j-1])
                    push(sx, unescape(s[i:j-1]))
                end
                i = k
            end
            c, j = next(s,k)
        else
            j = k
        end
    end
    if !isempty(s[i:])
        push(sx, unescape(s[i:j-1]))
    end
    length(sx) == 1 && isa(sx[1],ByteString) ? sx[1] :
        expr(:call, :print_to_string, printer, sx...)
end

_jl_interp_parse(s::String, u::Function) = _jl_interp_parse(s, u, print)
_jl_interp_parse(s::String) = _jl_interp_parse(s, x->check_utf8(unescape_string(x)))

function _jl_interp_parse_bytes(s::String)
    writer(x...) = for w=x; write(w); end
    _jl_interp_parse(s, unescape_string, writer)
end

## core string macros ##

macro   str(s); _jl_interp_parse(s); end
macro S_str(s); _jl_interp_parse(s); end
macro I_str(s); _jl_interp_parse(s, x->unescape_chars(x,"\"")); end
macro E_str(s); check_utf8(unescape_string(s)); end
macro B_str(s); _jl_interp_parse_bytes(s); end
macro b_str(s); ex = _jl_interp_parse_bytes(s); :(($ex).data); end

## shell-like command parsing ##

function _jl_shell_parse(s::String, interp::Bool)

    in_single_quotes = false
    in_double_quotes = false

    args = {}
    arg = {}
    i = start(s)
    j = i

    function update_arg(x)
        if !isa(x,String) || !isempty(x)
            push(arg, x)
        end
    end
    function append_arg()
        if isempty(arg); arg = {"",}; end
        push(args, arg)
        arg = {}
    end

    while !done(s,j)
        c, k = next(s,j)
        if !in_single_quotes && !in_double_quotes && iswspace(c)
            update_arg(s[i:j-1])
            append_arg()
            j = k
            while !done(s,j)
                c, k = next(s,j)
                if !iswspace(c)
                    i = j
                    break
                end
                j = k
            end
        elseif interp && !in_single_quotes && c == '$'
            update_arg(s[i:j-1]); i = k; j = k
            if done(s,k)
                error("\$ right before end of command")
            end
            if iswspace(s[k])
                error("space not allowed right after \$")
            end
            ex, j = parseatom(s,j)
            update_arg(ex); i = j
        else
            if !in_double_quotes && c == '\''
                in_single_quotes = !in_single_quotes
                update_arg(s[i:j-1]); i = k
            elseif !in_single_quotes && c == '"'
                in_double_quotes = !in_double_quotes
                update_arg(s[i:j-1]); i = k
            elseif c == '\\'
                if in_double_quotes
                    if done(s,k)
                        error("unterminated double quote")
                    end
                    if s[k] == '"' || s[k] == '$'
                        update_arg(s[i:j-1]); i = k
                        c, k = next(s,k)
                    end
                elseif !in_single_quotes
                    if done(s,k)
                        error("dangling backslash")
                    end
                    update_arg(s[i:j-1]); i = k
                    c, k = next(s,k)
                end
            end
            j = k
        end
    end

    if in_single_quotes; error("unterminated single quote"); end
    if in_double_quotes; error("unterminated double quote"); end

    update_arg(s[i:])
    append_arg()

    if !interp
        return args
    end

    # construct an expression
    exprs = {}
    for arg in args
        push(exprs, expr(:tuple, arg))
    end
    expr(:tuple,exprs)
end
_jl_shell_parse(s::String) = _jl_shell_parse(s,true)

function shell_split(s::String)
    parsed = _jl_shell_parse(s,false)
    args = String[]
    for arg in parsed
       push(args, strcat(arg...))
    end
    args
end

function print_shell_word(word::String)
    if isempty(word)
        print("''")
    end
    has_single = false
    has_special = false
    for c in word
        if iswspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$'
            has_special = true
            if c == '\''
                has_single = true
            end
        end
    end
    if !has_special
        print(word)
    elseif !has_single
        print('\'', word, '\'')
    else
        print('"')
        for c in word
            if c == '"' || c == '$'
                print('\\')
            end
            print(c)
        end
        print('"')
    end
end

function print_shell_escaped(cmd::String, args::String...)
    print_shell_word(cmd)
    for arg in args
        print(' ')
        print_shell_word(arg)
    end
end

shell_escape(cmd::String, args::String...) =
    print_to_string(print_shell_escaped, cmd, args...)

## interface to parser ##

function parse(s::String, pos, greedy)
    # returns (expr, end_pos). expr is () in case of parse error.
    ex, pos = ccall(:jl_parse_string, Any,
                    (Ptr{Uint8}, Int32, Int32),
                    cstring(s), pos-1, greedy ? 1:0)
    if isa(ex,Expr) && is(ex.head,:error)
        throw(ParseError(ex.args[1]))
    end
    if ex == (); throw(ParseError("end of input")); end
    ex, pos+1 # C is zero-based, Julia is 1-based
end

parse(s::String)          = parse(s, 1, true)
parse(s::String, pos)     = parse(s, pos, true)
parseatom(s::String)      = parse(s, 1, false)
parseatom(s::String, pos) = parse(s, pos, false)

## miscellaneous string functions ##

function lpad(s::String, n::Integer, p::String)
    m = n - strlen(s)
    if m <= 0; return s; end
    l = strlen(p)
    if l==1
        return p^m * s
    end
    q = div(m,l)
    r = m - q*l
    cstring(p^q*p[1:chr2ind(p,r)]*s)
end

function rpad(s::String, n::Integer, p::String)
    m = n - strlen(s)
    if m <= 0; return s; end
    l = strlen(p)
    if l==1
        return s * p^m
    end
    q = div(m,l)
    r = m - q*l
    cstring(s*p^q*p[1:chr2ind(p,r)])
end

lpad(s, n::Integer, p) = lpad(string(s), n, string(p))
rpad(s, n::Integer, p) = rpad(string(s), n, string(p))

lpad(s, n::Integer) = lpad(string(s), n, " ")
rpad(s, n::Integer) = rpad(string(s), n, " ")

function split(s::String, delims, include_empty::Bool)
    i = 1
    strs = String[]
    len = length(s)
    while true
        tokstart = tokend = i
        while !done(s,i)
            (c,i) = next(s,i)
            if contains(delims, c)
                break
            end
            tokend = i
        end
        tok = s[tokstart:(tokend-1)]
        if include_empty || !isempty(tok)
            push(strs, tok)
        end
        if !((i <= len) || (i==len+1 && tokend!=i))
            break
        end
    end
    strs
end

split(s::String) = split(s, (' ','\t','\n','\v','\f','\r'), false)
split(s::String, x) = split(s, x, true)
split(s::String, x::Char, incl::Bool) = split(s, (x,), incl)

function print_joined(strings, delim, last)
    i = start(strings)
    if done(strings,i)
        return
    end
    str, i = next(strings,i)
    print(str)
    while !done(strings,i)
        str, i = next(strings,i)
        print(done(strings,i) ? last : delim)
        print(str)
    end
end

function print_joined(strings, delim)
    i = start(strings)
    while !done(strings,i)
        str, i = next(strings,i)
        print(str)
        if !done(strings,i)
            print(delim)
        end
    end
end
print_joined(strings) = print_joined(strings, "")

join(args...) = print_to_string(print_joined, args...)

chop(s::String) = s[1:thisind(s,length(s))-1]
chomp(s::String) = (i=thisind(s,length(s)); s[i]=='\n' ? s[1:i-1] : s)
chomp(s::ByteString) = s.data[end]==0x0a ? s[1:end-1] : s

function lstrip(s::String)
    i = start(s)
    while !done(s,i)
        c, j = next(s,i)
        if !iswspace(c)
            return s[i:end]
        end
        i = j
    end
    ""
end

function rstrip(s::String)
    r = reverse(s)
    i = start(r)
    while !done(r,i)
        c, j = next(r,i)
        if !iswspace(c)
            return s[1:end-i+1]
        end
        i = j
    end
    ""
end

strip(s::String) = lstrip(rstrip(s))

## string to integer functions ##

function parse_int{T<:Integer}(::Type{T}, s::String, base::Integer)
    if !(2 <= base <= 36); error("invalid base: ",base); end
    i = start(s)
    if done(s,i)
        error("premature end of integer (in ",show_to_string(s),")")
    end
    c,i = next(s,i)
    sgn = one(T)
    if T <: Signed && c == '-'
        sgn = -sgn
        if done(s,i)
            error("premature end of integer (in ",show_to_string(s),")")
        end
        c,i = next(s,i)
    end
    base = convert(T,base)
    n::T = 0
    while true
        d = '0' <= c <= '9' ? c-'0' :
            'A' <= c <= 'Z' ? c-'A'+10 :
            'a' <= c <= 'z' ? c-'a'+10 : typemax(Int)
        if d >= base
            error(show_to_string(c)," is not a valid digit (in ",show_to_string(s),")")
        end
        # TODO: overflow detection?
        n = n*base + d
        if done(s,i)
            break
        end
        c,i = next(s,i)
    end
    return flipsign(n,sgn)
end

parse_int(s::String, base::Integer) = parse_int(Int,s,base)
parse_int(T::Type, s::String)       = parse_int(T,s,10)
parse_int(s::String)                = parse_int(Int,s,10)

parse_bin(T::Type, s::String) = parse_int(T,s,2)
parse_oct(T::Type, s::String) = parse_int(T,s,8)
parse_hex(T::Type, s::String) = parse_int(T,s,16)

parse_bin(s::String) = parse_int(Int,s,2)
parse_oct(s::String) = parse_int(Int,s,8)
parse_hex(s::String) = parse_int(Int,s,16)

integer (s::String) = int(s)
unsigned(s::String) = uint(s)
int     (s::String) = parse_int(Int,s)
uint    (s::String) = parse_int(Uint,s)
int8    (s::String) = parse_int(Int8,s)
uint8   (s::String) = parse_int(Uint8,s)
int16   (s::String) = parse_int(Int16,s)
uint16  (s::String) = parse_int(Uint16,s)
int32   (s::String) = parse_int(Int32,s)
uint32  (s::String) = parse_int(Uint32,s)
int64   (s::String) = parse_int(Int64,s)
uint64  (s::String) = parse_int(Uint64,s)

## integer to string functions ##

const _jl_dig_syms = "0123456789abcdefghijklmnopqrstuvwxyz".data

function int2str(n::Union(Int64,Uint64), b::Integer, l::Int)
    if b < 2 || b > 36; error("int2str: invalid base ", b); end
    neg = n < 0
    n = unsigned(abs(n))
    b = convert(typeof(n), b)
    ndig = ndigits(n, b)
    sz = max(convert(Int, ndig), l) + neg
    data = Array(Uint8, sz)
    i = sz
    if ispow2(b)
        digmask = b-1
        shift = trailing_zeros(b)
        while i > neg
            ch = n & digmask
            data[i] = _jl_dig_syms[int(ch)+1]
            n >>= shift
            i -= 1
        end
    else
        while i > neg
            ch = n % b
            data[i] = _jl_dig_syms[int(ch)+1]
            n = div(n,b)
            i -= 1
        end
    end
    if neg
        data[1] = '-'
    end
    ASCIIString(data)
end
int2str(n::Integer, b::Integer)         = int2str(n, b, 0)
int2str(n::Integer, b::Integer, l::Int) = int2str(int64(n), b, l)

string(x::Signed) = dec(int64(x))
cstring(x::Signed) = dec(int64(x))

## string to float functions ##

function float64_isvalid(s::String, out::Array{Float64,1})
    s = cstring(s)
    return (ccall(:jl_strtod, Int32, (Ptr{Uint8},Ptr{Float64}), s, out)==0)
end

function float32_isvalid(s::String, out::Array{Float32,1})
    s = cstring(s)
    return (ccall(:jl_strtof, Int32, (Ptr{Uint8},Ptr{Float32}), s, out)==0)
end

begin
    local tmp::Array{Float64,1} = Array(Float64,1)
    local tmpf::Array{Float32,1} = Array(Float32,1)
    global float64, float32
    function float64(s::String)
        if !float64_isvalid(s, tmp)
            throw(ArgumentError("float64(String): invalid number format"))
        end
        return tmp[1]
    end

    function float32(s::String)
        if !float32_isvalid(s, tmpf)
            throw(ArgumentError("float32(String): invalid number format"))
        end
        return tmpf[1]
    end
end

float(x::String) = float64(x)
parse_float(x::String) = float64(x)
parse_float(::Type{Float64}, x::String) = float64(x)
parse_float(::Type{Float32}, x::String) = float32(x)

# copying a byte string (generally not needed due to "immutability")

strcpy{T<:ByteString}(s::T) = T(copy(s.data))

# lexicographically compare byte arrays (used by Latin-1 and UTF-8)

function lexcmp(a::Array{Uint8,1}, b::Array{Uint8,1})
    c = ccall(:memcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}, Uint),
              a, b, min(length(a),length(b)))
    c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b))
end

# find the index of the first occurrence of a byte value in a byte array

function memchr(a::Array{Uint8,1}, b::Integer)
    p = pointer(a)
    q = ccall(:memchr, Ptr{Uint8}, (Ptr{Uint8}, Int32, Uint), p, b, length(a))
    q == C_NULL ? 0 : q - p + 1
end

# concatenate byte arrays into a single array

memcat() = Array(Uint8,0)
memcat(a::Array{Uint8,1}) = copy(a)

function memcat(arrays::Array{Uint8,1}...)
    n = 0
    for a in arrays
        n += length(a)
    end
    arr = Array(Uint8, n)
    ptr = pointer(arr)
    offset = 0
    for a in arrays
        ccall(:memcpy, Ptr{Uint8}, (Ptr{Uint8}, Ptr{Uint8}, Uint),
              ptr+offset, a, length(a))
        offset += length(a)
    end
    return arr
end

# concatenate the data fields of byte strings

memcat(s::ByteString) = memcat(s.data)
memcat(sx::ByteString...) = memcat(map(s->s.data, sx)...)