strings-and-runes: added comments, output and listed in examples
This commit is contained in:
parent
ff399c7001
commit
b2057ccfd2
@ -15,6 +15,7 @@ Variadic Functions
|
||||
Closures
|
||||
Recursion
|
||||
Pointers
|
||||
Strings and Runes
|
||||
Structs
|
||||
Methods
|
||||
Interfaces
|
||||
|
73
examples/strings-and-runes/strings-and-runes.go
Normal file
73
examples/strings-and-runes/strings-and-runes.go
Normal file
@ -0,0 +1,73 @@
|
||||
// A Go string is a read-only slice of bytes. The language
|
||||
// and the standard library treat strings specially - as
|
||||
// containers of text encoded in [UTF-8](https://en.wikipedia.org/wiki/UTF-8).
|
||||
// In other languages, strings are made of "characters".
|
||||
// In Go, the concept of a character is called a `rune` - it's
|
||||
// an integer that represents a Unicode code point.
|
||||
// [This Go blog post](https://go.dev/blog/strings) is a good
|
||||
// introduction to the topic.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
// `s` is a `string` assigned a literal value
|
||||
// representing the world "hello" in the Thai
|
||||
// language. Go string literals are UTF-8
|
||||
// encoded text.
|
||||
const s = "สวัสดี"
|
||||
|
||||
// Since strings are equivalent to `[]byte`, this
|
||||
// will produce the length of the raw bytes stored within.
|
||||
fmt.Println("Len:", len(s))
|
||||
|
||||
// Indexing into a string produces the raw byte values at
|
||||
// each index. This loop generates the hex values of all
|
||||
// the bytes that constitute the code points in `s`.
|
||||
for i := 0; i < len(s); i++ {
|
||||
fmt.Printf("%x ", s[i])
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
// To count how many _runes_ are in a string, we can use
|
||||
// the `utf8` package. Note that the run-time of
|
||||
// `RuneCountInString` dependes on the size of the string,
|
||||
// because it has to decode each UTF-8 rune sequentially.
|
||||
// Some Thai characters are represented by multiple UTF-8
|
||||
// code points, so the result of this count may be surprising.
|
||||
fmt.Println("Rune count:", utf8.RuneCountInString(s))
|
||||
|
||||
// A `range` loop handles strings specially and decodes
|
||||
// each `rune` along with its offset in the string.
|
||||
for idx, runeValue := range s {
|
||||
fmt.Printf("%#U starts at %d\n", runeValue, idx)
|
||||
}
|
||||
|
||||
// We can achieve the same iteration by using the
|
||||
// `utf8.DecodeRuneInString` function explicitly.
|
||||
fmt.Println("\nUsing DecodeRuneInString")
|
||||
for i, w := 0, 0; i < len(s); i += w {
|
||||
runeValue, width := utf8.DecodeRuneInString(s[i:])
|
||||
fmt.Printf("%#U starts at %d\n", runeValue, i)
|
||||
w = width
|
||||
|
||||
// This demonstrates passing a `rune` value to a function.
|
||||
examineRune(runeValue)
|
||||
}
|
||||
}
|
||||
|
||||
func examineRune(r rune) {
|
||||
|
||||
// Values enclosed in single quotes are _rune literals_. We
|
||||
// can compare a `rune` value to a rune literal directly.
|
||||
if r == 't' {
|
||||
fmt.Println("found tee")
|
||||
} else if r == 'ส' {
|
||||
fmt.Println("found so sua")
|
||||
}
|
||||
}
|
2
examples/strings-and-runes/strings-and-runes.hash
Normal file
2
examples/strings-and-runes/strings-and-runes.hash
Normal file
@ -0,0 +1,2 @@
|
||||
c96321f2951af50985c648779a3a41d0b48007a7
|
||||
jDBFShEYIwP
|
20
examples/strings-and-runes/strings-and-runes.sh
Normal file
20
examples/strings-and-runes/strings-and-runes.sh
Normal file
@ -0,0 +1,20 @@
|
||||
$ go run strings-and-runes.go
|
||||
Len: 18
|
||||
e0 b8 aa e0 b8 a7 e0 b8 b1 e0 b8 aa e0 b8 94 e0 b8 b5
|
||||
Rune count: 6
|
||||
U+0E2A 'ส' starts at 0
|
||||
U+0E27 'ว' starts at 3
|
||||
U+0E31 'ั' starts at 6
|
||||
U+0E2A 'ส' starts at 9
|
||||
U+0E14 'ด' starts at 12
|
||||
U+0E35 'ี' starts at 15
|
||||
|
||||
Using DecodeRuneInString
|
||||
U+0E2A 'ส' starts at 0
|
||||
found so sua
|
||||
U+0E27 'ว' starts at 3
|
||||
U+0E31 'ั' starts at 6
|
||||
U+0E2A 'ส' starts at 9
|
||||
found so sua
|
||||
U+0E14 'ด' starts at 12
|
||||
U+0E35 'ี' starts at 15
|
@ -1,45 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TODO: Thai hello, vowels
|
||||
|
||||
func main() {
|
||||
const hello = "สวัสดี"
|
||||
foo(hello)
|
||||
}
|
||||
|
||||
func foo(s string) {
|
||||
fmt.Println("Len:", len(s))
|
||||
|
||||
for i := 0; i < len(s); i++ {
|
||||
fmt.Printf("%x ", s[i])
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
fmt.Println("Rune count:", utf8.RuneCountInString(s))
|
||||
|
||||
for idx, runeValue := range s {
|
||||
fmt.Printf("%#U starts at %d\n", runeValue, idx)
|
||||
}
|
||||
|
||||
fmt.Println("\nUsing DecodeRuneInString")
|
||||
for i, w := 0, 0; i < len(s); i += w {
|
||||
runeValue, width := utf8.DecodeRuneInString(s[i:])
|
||||
fmt.Printf("%#U starts at %d\n", runeValue, i)
|
||||
w = width
|
||||
|
||||
examineRune(runeValue)
|
||||
}
|
||||
}
|
||||
|
||||
func examineRune(r rune) {
|
||||
if r == 't' {
|
||||
fmt.Println("found tee")
|
||||
} else if r == 'ส' {
|
||||
fmt.Println("found so sua")
|
||||
}
|
||||
}
|
2
public/index.html
generated
2
public/index.html
generated
@ -61,6 +61,8 @@
|
||||
|
||||
<li><a href="pointers">Pointers</a></li>
|
||||
|
||||
<li><a href="strings-and-runes">Strings and Runes</a></li>
|
||||
|
||||
<li><a href="structs">Structs</a></li>
|
||||
|
||||
<li><a href="methods">Methods</a></li>
|
||||
|
4
public/pointers
generated
4
public/pointers
generated
@ -14,7 +14,7 @@
|
||||
|
||||
|
||||
if (e.key == "ArrowRight") {
|
||||
window.location.href = 'structs';
|
||||
window.location.href = 'strings-and-runes';
|
||||
}
|
||||
|
||||
}
|
||||
@ -179,7 +179,7 @@ the memory address for that variable.</p>
|
||||
|
||||
|
||||
<p class="next">
|
||||
Next example: <a href="structs">Structs</a>.
|
||||
Next example: <a href="strings-and-runes">Strings and Runes</a>.
|
||||
</p>
|
||||
|
||||
|
||||
|
287
public/strings-and-runes
generated
Normal file
287
public/strings-and-runes
generated
Normal file
@ -0,0 +1,287 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Go by Example: Strings and Runes</title>
|
||||
<link rel=stylesheet href="site.css">
|
||||
</head>
|
||||
<script>
|
||||
onkeydown = (e) => {
|
||||
|
||||
if (e.key == "ArrowLeft") {
|
||||
window.location.href = 'pointers';
|
||||
}
|
||||
|
||||
|
||||
if (e.key == "ArrowRight") {
|
||||
window.location.href = 'structs';
|
||||
}
|
||||
|
||||
}
|
||||
</script>
|
||||
<body>
|
||||
<div class="example" id="strings-and-runes">
|
||||
<h2><a href="./">Go by Example</a>: Strings and Runes</h2>
|
||||
|
||||
<table>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
<p>A Go string is a read-only slice of bytes. The language
|
||||
and the standard library treat strings specially - as
|
||||
containers of text encoded in <a href="https://en.wikipedia.org/wiki/UTF-8">UTF-8</a>.
|
||||
In other languages, strings are made of “characters”.
|
||||
In Go, the concept of a character is called a <code>rune</code> - it’s
|
||||
an integer that represents a Unicode code point.
|
||||
<a href="https://go.dev/blog/strings">This Go blog post</a> is a good
|
||||
introduction to the topic.</p>
|
||||
|
||||
</td>
|
||||
<td class="code empty leading">
|
||||
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
|
||||
</td>
|
||||
<td class="code leading">
|
||||
<a href="http://play.golang.org/p/jDBFShEYIwP"><img title="Run code" src="play.png" class="run" /></a><img title="Copy code" src="clipboard.png" class="copy" />
|
||||
<pre class="chroma"><span class="kn">package</span> <span class="nx">main</span>
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
|
||||
</td>
|
||||
<td class="code leading">
|
||||
|
||||
<pre class="chroma"><span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="s">"fmt"</span>
|
||||
<span class="s">"unicode/utf8"</span>
|
||||
<span class="p">)</span>
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
|
||||
</td>
|
||||
<td class="code leading">
|
||||
|
||||
<pre class="chroma"><span class="kd">func</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
<p><code>s</code> is a <code>string</code> assigned a literal value
|
||||
representing the world “hello” in the Thai
|
||||
language. Go string literals are UTF-8
|
||||
encoded text.</p>
|
||||
|
||||
</td>
|
||||
<td class="code leading">
|
||||
|
||||
<pre class="chroma">
|
||||
<span class="kd">const</span> <span class="nx">s</span> <span class="p">=</span> <span class="s">"สวัสดี"</span>
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
<p>Since strings are equivalent to <code>[]byte</code>, this
|
||||
will produce the length of the raw bytes stored within.</p>
|
||||
|
||||
</td>
|
||||
<td class="code leading">
|
||||
|
||||
<pre class="chroma">
|
||||
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Println</span><span class="p">(</span><span class="s">"Len:"</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="nx">s</span><span class="p">))</span>
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
<p>Indexing into a string produces the raw byte values at
|
||||
each index. This loop generates the hex values of all
|
||||
the bytes that constitute the code points in <code>s</code>.</p>
|
||||
|
||||
</td>
|
||||
<td class="code leading">
|
||||
|
||||
<pre class="chroma">
|
||||
<span class="k">for</span> <span class="nx">i</span> <span class="o">:=</span> <span class="mi">0</span><span class="p">;</span> <span class="nx">i</span> <span class="p"><</span> <span class="nb">len</span><span class="p">(</span><span class="nx">s</span><span class="p">);</span> <span class="nx">i</span><span class="o">++</span> <span class="p">{</span>
|
||||
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Printf</span><span class="p">(</span><span class="s">"%x "</span><span class="p">,</span> <span class="nx">s</span><span class="p">[</span><span class="nx">i</span><span class="p">])</span>
|
||||
<span class="p">}</span>
|
||||
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Println</span><span class="p">()</span>
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
<p>To count how many <em>runes</em> are in a string, we can use
|
||||
the <code>utf8</code> package. Note that the run-time of
|
||||
<code>RuneCountInString</code> dependes on the size of the string,
|
||||
because it has to decode each UTF-8 rune sequentially.
|
||||
Some Thai characters are represented by multiple UTF-8
|
||||
code points, so the result of this count may be surprising.</p>
|
||||
|
||||
</td>
|
||||
<td class="code leading">
|
||||
|
||||
<pre class="chroma">
|
||||
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Println</span><span class="p">(</span><span class="s">"Rune count:"</span><span class="p">,</span> <span class="nx">utf8</span><span class="p">.</span><span class="nf">RuneCountInString</span><span class="p">(</span><span class="nx">s</span><span class="p">))</span>
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
<p>A <code>range</code> loop handles strings specially and decodes
|
||||
each <code>rune</code> along with its offset in the string.</p>
|
||||
|
||||
</td>
|
||||
<td class="code leading">
|
||||
|
||||
<pre class="chroma">
|
||||
<span class="k">for</span> <span class="nx">idx</span><span class="p">,</span> <span class="nx">runeValue</span> <span class="o">:=</span> <span class="k">range</span> <span class="nx">s</span> <span class="p">{</span>
|
||||
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Printf</span><span class="p">(</span><span class="s">"%#U starts at %d\n"</span><span class="p">,</span> <span class="nx">runeValue</span><span class="p">,</span> <span class="nx">idx</span><span class="p">)</span>
|
||||
<span class="p">}</span>
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
<p>We can achieve the same iteration by using the
|
||||
<code>utf8.DecodeRuneInString</code> function explicitly.</p>
|
||||
|
||||
</td>
|
||||
<td class="code leading">
|
||||
|
||||
<pre class="chroma">
|
||||
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Println</span><span class="p">(</span><span class="s">"\nUsing DecodeRuneInString"</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="nx">i</span><span class="p">,</span> <span class="nx">w</span> <span class="o">:=</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">;</span> <span class="nx">i</span> <span class="p"><</span> <span class="nb">len</span><span class="p">(</span><span class="nx">s</span><span class="p">);</span> <span class="nx">i</span> <span class="o">+=</span> <span class="nx">w</span> <span class="p">{</span>
|
||||
<span class="nx">runeValue</span><span class="p">,</span> <span class="nx">width</span> <span class="o">:=</span> <span class="nx">utf8</span><span class="p">.</span><span class="nf">DecodeRuneInString</span><span class="p">(</span><span class="nx">s</span><span class="p">[</span><span class="nx">i</span><span class="p">:])</span>
|
||||
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Printf</span><span class="p">(</span><span class="s">"%#U starts at %d\n"</span><span class="p">,</span> <span class="nx">runeValue</span><span class="p">,</span> <span class="nx">i</span><span class="p">)</span>
|
||||
<span class="nx">w</span> <span class="p">=</span> <span class="nx">width</span>
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
<p>This demonstrates passing a <code>rune</code> value to a function.</p>
|
||||
|
||||
</td>
|
||||
<td class="code leading">
|
||||
|
||||
<pre class="chroma">
|
||||
<span class="nf">examineRune</span><span class="p">(</span><span class="nx">runeValue</span><span class="p">)</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">}</span>
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
|
||||
</td>
|
||||
<td class="code leading">
|
||||
|
||||
<pre class="chroma"><span class="kd">func</span> <span class="nf">examineRune</span><span class="p">(</span><span class="nx">r</span> <span class="kt">rune</span><span class="p">)</span> <span class="p">{</span>
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
<p>Values enclosed in single quotes are <em>rune literals</em>. We
|
||||
can compare a <code>rune</code> value to a rune literal directly.</p>
|
||||
|
||||
</td>
|
||||
<td class="code">
|
||||
|
||||
<pre class="chroma">
|
||||
<span class="k">if</span> <span class="nx">r</span> <span class="o">==</span> <span class="sc">'t'</span> <span class="p">{</span>
|
||||
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Println</span><span class="p">(</span><span class="s">"found tee"</span><span class="p">)</span>
|
||||
<span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="nx">r</span> <span class="o">==</span> <span class="sc">'ส'</span> <span class="p">{</span>
|
||||
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Println</span><span class="p">(</span><span class="s">"found so sua"</span><span class="p">)</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">}</span>
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
</table>
|
||||
|
||||
<table>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
|
||||
</td>
|
||||
<td class="code leading">
|
||||
|
||||
<pre class="chroma"><span class="gp">$</span> go run strings-and-runes.go
|
||||
<span class="go">Len: 18
|
||||
</span><span class="go">e0 b8 aa e0 b8 a7 e0 b8 b1 e0 b8 aa e0 b8 94 e0 b8 b5
|
||||
</span><span class="go">Rune count: 6
|
||||
</span><span class="go">U+0E2A 'ส' starts at 0
|
||||
</span><span class="go">U+0E27 'ว' starts at 3
|
||||
</span><span class="go">U+0E31 'ั' starts at 6
|
||||
</span><span class="go">U+0E2A 'ส' starts at 9
|
||||
</span><span class="go">U+0E14 'ด' starts at 12
|
||||
</span><span class="go">U+0E35 'ี' starts at 15</span></pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="docs">
|
||||
|
||||
</td>
|
||||
<td class="code">
|
||||
|
||||
<pre class="chroma"><span class="go">Using DecodeRuneInString
|
||||
</span><span class="go">U+0E2A 'ส' starts at 0
|
||||
</span><span class="go">found so sua
|
||||
</span><span class="go">U+0E27 'ว' starts at 3
|
||||
</span><span class="go">U+0E31 'ั' starts at 6
|
||||
</span><span class="go">U+0E2A 'ส' starts at 9
|
||||
</span><span class="go">found so sua
|
||||
</span><span class="go">U+0E14 'ด' starts at 12
|
||||
</span><span class="go">U+0E35 'ี' starts at 15</span></pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
</table>
|
||||
|
||||
|
||||
<p class="next">
|
||||
Next example: <a href="structs">Structs</a>.
|
||||
</p>
|
||||
|
||||
|
||||
<p class="footer">
|
||||
by <a href="https://markmcgranaghan.com">Mark McGranaghan</a> and <a href="https://eli.thegreenplace.net">Eli Bendersky</a> | <a href="https://github.com/mmcgrana/gobyexample">source</a> | <a href="https://github.com/mmcgrana/gobyexample#license">license</a>
|
||||
</p>
|
||||
|
||||
</div>
|
||||
<script>
|
||||
var codeLines = [];
|
||||
codeLines.push('');codeLines.push('package main\u000A');codeLines.push('import (\u000A \"fmt\"\u000A \"unicode/utf8\"\u000A)\u000A');codeLines.push('func main() {\u000A');codeLines.push(' const s \u003D \"สวัสดี\"\u000A');codeLines.push(' fmt.Println(\"Len:\", len(s))\u000A');codeLines.push(' for i :\u003D 0; i \u003C len(s); i++ {\u000A fmt.Printf(\"%x \", s[i])\u000A }\u000A fmt.Println()\u000A');codeLines.push(' fmt.Println(\"Rune count:\", utf8.RuneCountInString(s))\u000A');codeLines.push(' for idx, runeValue :\u003D range s {\u000A fmt.Printf(\"%#U starts at %d\\n\", runeValue, idx)\u000A }\u000A');codeLines.push(' fmt.Println(\"\\nUsing DecodeRuneInString\")\u000A for i, w :\u003D 0, 0; i \u003C len(s); i +\u003D w {\u000A runeValue, width :\u003D utf8.DecodeRuneInString(s[i:])\u000A fmt.Printf(\"%#U starts at %d\\n\", runeValue, i)\u000A w \u003D width\u000A');codeLines.push(' examineRune(runeValue)\u000A }\u000A}\u000A');codeLines.push('func examineRune(r rune) {\u000A');codeLines.push(' if r \u003D\u003D \'t\' {\u000A fmt.Println(\"found tee\")\u000A } else if r \u003D\u003D \'ส\' {\u000A fmt.Println(\"found so sua\")\u000A }\u000A}\u000A');codeLines.push('');codeLines.push('');
|
||||
</script>
|
||||
<script src="site.js" async></script>
|
||||
</body>
|
||||
</html>
|
2
public/structs
generated
2
public/structs
generated
@ -9,7 +9,7 @@
|
||||
onkeydown = (e) => {
|
||||
|
||||
if (e.key == "ArrowLeft") {
|
||||
window.location.href = 'pointers';
|
||||
window.location.href = 'strings-and-runes';
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user