strings-and-runes: added comments, output and listed in examples

This commit is contained in:
Eli Bendersky 2022-02-05 07:08:26 -08:00
parent ff399c7001
commit b2057ccfd2
9 changed files with 388 additions and 48 deletions

View File

@ -15,6 +15,7 @@ Variadic Functions
Closures
Recursion
Pointers
Strings and Runes
Structs
Methods
Interfaces

View File

@ -0,0 +1,73 @@
// A Go string is a read-only slice of bytes. The language
// and the standard library treat strings specially - as
// containers of text encoded in [UTF-8](https://en.wikipedia.org/wiki/UTF-8).
// In other languages, strings are made of "characters".
// In Go, the concept of a character is called a `rune` - it's
// an integer that represents a Unicode code point.
// [This Go blog post](https://go.dev/blog/strings) is a good
// introduction to the topic.
package main
import (
"fmt"
"unicode/utf8"
)
func main() {
// `s` is a `string` assigned a literal value
// representing the world "hello" in the Thai
// language. Go string literals are UTF-8
// encoded text.
const s = "สวัสดี"
// Since strings are equivalent to `[]byte`, this
// will produce the length of the raw bytes stored within.
fmt.Println("Len:", len(s))
// Indexing into a string produces the raw byte values at
// each index. This loop generates the hex values of all
// the bytes that constitute the code points in `s`.
for i := 0; i < len(s); i++ {
fmt.Printf("%x ", s[i])
}
fmt.Println()
// To count how many _runes_ are in a string, we can use
// the `utf8` package. Note that the run-time of
// `RuneCountInString` dependes on the size of the string,
// because it has to decode each UTF-8 rune sequentially.
// Some Thai characters are represented by multiple UTF-8
// code points, so the result of this count may be surprising.
fmt.Println("Rune count:", utf8.RuneCountInString(s))
// A `range` loop handles strings specially and decodes
// each `rune` along with its offset in the string.
for idx, runeValue := range s {
fmt.Printf("%#U starts at %d\n", runeValue, idx)
}
// We can achieve the same iteration by using the
// `utf8.DecodeRuneInString` function explicitly.
fmt.Println("\nUsing DecodeRuneInString")
for i, w := 0, 0; i < len(s); i += w {
runeValue, width := utf8.DecodeRuneInString(s[i:])
fmt.Printf("%#U starts at %d\n", runeValue, i)
w = width
// This demonstrates passing a `rune` value to a function.
examineRune(runeValue)
}
}
func examineRune(r rune) {
// Values enclosed in single quotes are _rune literals_. We
// can compare a `rune` value to a rune literal directly.
if r == 't' {
fmt.Println("found tee")
} else if r == 'ส' {
fmt.Println("found so sua")
}
}

View File

@ -0,0 +1,2 @@
c96321f2951af50985c648779a3a41d0b48007a7
jDBFShEYIwP

View File

@ -0,0 +1,20 @@
$ go run strings-and-runes.go
Len: 18
e0 b8 aa e0 b8 a7 e0 b8 b1 e0 b8 aa e0 b8 94 e0 b8 b5
Rune count: 6
U+0E2A 'ส' starts at 0
U+0E27 'ว' starts at 3
U+0E31 'ั' starts at 6
U+0E2A 'ส' starts at 9
U+0E14 'ด' starts at 12
U+0E35 'ี' starts at 15
Using DecodeRuneInString
U+0E2A 'ส' starts at 0
found so sua
U+0E27 'ว' starts at 3
U+0E31 'ั' starts at 6
U+0E2A 'ส' starts at 9
found so sua
U+0E14 'ด' starts at 12
U+0E35 'ี' starts at 15

View File

@ -1,45 +0,0 @@
package main
import (
"fmt"
"unicode/utf8"
)
// TODO: Thai hello, vowels
func main() {
const hello = "สวัสดี"
foo(hello)
}
func foo(s string) {
fmt.Println("Len:", len(s))
for i := 0; i < len(s); i++ {
fmt.Printf("%x ", s[i])
}
fmt.Println()
fmt.Println("Rune count:", utf8.RuneCountInString(s))
for idx, runeValue := range s {
fmt.Printf("%#U starts at %d\n", runeValue, idx)
}
fmt.Println("\nUsing DecodeRuneInString")
for i, w := 0, 0; i < len(s); i += w {
runeValue, width := utf8.DecodeRuneInString(s[i:])
fmt.Printf("%#U starts at %d\n", runeValue, i)
w = width
examineRune(runeValue)
}
}
func examineRune(r rune) {
if r == 't' {
fmt.Println("found tee")
} else if r == 'ส' {
fmt.Println("found so sua")
}
}

2
public/index.html generated
View File

@ -61,6 +61,8 @@
<li><a href="pointers">Pointers</a></li>
<li><a href="strings-and-runes">Strings and Runes</a></li>
<li><a href="structs">Structs</a></li>
<li><a href="methods">Methods</a></li>

4
public/pointers generated
View File

@ -14,7 +14,7 @@
if (e.key == "ArrowRight") {
window.location.href = 'structs';
window.location.href = 'strings-and-runes';
}
}
@ -179,7 +179,7 @@ the memory address for that variable.</p>
<p class="next">
Next example: <a href="structs">Structs</a>.
Next example: <a href="strings-and-runes">Strings and Runes</a>.
</p>

287
public/strings-and-runes generated Normal file
View File

@ -0,0 +1,287 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Go by Example: Strings and Runes</title>
<link rel=stylesheet href="site.css">
</head>
<script>
onkeydown = (e) => {
if (e.key == "ArrowLeft") {
window.location.href = 'pointers';
}
if (e.key == "ArrowRight") {
window.location.href = 'structs';
}
}
</script>
<body>
<div class="example" id="strings-and-runes">
<h2><a href="./">Go by Example</a>: Strings and Runes</h2>
<table>
<tr>
<td class="docs">
<p>A Go string is a read-only slice of bytes. The language
and the standard library treat strings specially - as
containers of text encoded in <a href="https://en.wikipedia.org/wiki/UTF-8">UTF-8</a>.
In other languages, strings are made of &ldquo;characters&rdquo;.
In Go, the concept of a character is called a <code>rune</code> - it&rsquo;s
an integer that represents a Unicode code point.
<a href="https://go.dev/blog/strings">This Go blog post</a> is a good
introduction to the topic.</p>
</td>
<td class="code empty leading">
</td>
</tr>
<tr>
<td class="docs">
</td>
<td class="code leading">
<a href="http://play.golang.org/p/jDBFShEYIwP"><img title="Run code" src="play.png" class="run" /></a><img title="Copy code" src="clipboard.png" class="copy" />
<pre class="chroma"><span class="kn">package</span> <span class="nx">main</span>
</pre>
</td>
</tr>
<tr>
<td class="docs">
</td>
<td class="code leading">
<pre class="chroma"><span class="kn">import</span> <span class="p">(</span>
<span class="s">&#34;fmt&#34;</span>
<span class="s">&#34;unicode/utf8&#34;</span>
<span class="p">)</span>
</pre>
</td>
</tr>
<tr>
<td class="docs">
</td>
<td class="code leading">
<pre class="chroma"><span class="kd">func</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
</pre>
</td>
</tr>
<tr>
<td class="docs">
<p><code>s</code> is a <code>string</code> assigned a literal value
representing the world &ldquo;hello&rdquo; in the Thai
language. Go string literals are UTF-8
encoded text.</p>
</td>
<td class="code leading">
<pre class="chroma">
<span class="kd">const</span> <span class="nx">s</span> <span class="p">=</span> <span class="s">&#34;สวัสดี&#34;</span>
</pre>
</td>
</tr>
<tr>
<td class="docs">
<p>Since strings are equivalent to <code>[]byte</code>, this
will produce the length of the raw bytes stored within.</p>
</td>
<td class="code leading">
<pre class="chroma">
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Println</span><span class="p">(</span><span class="s">&#34;Len:&#34;</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="nx">s</span><span class="p">))</span>
</pre>
</td>
</tr>
<tr>
<td class="docs">
<p>Indexing into a string produces the raw byte values at
each index. This loop generates the hex values of all
the bytes that constitute the code points in <code>s</code>.</p>
</td>
<td class="code leading">
<pre class="chroma">
<span class="k">for</span> <span class="nx">i</span> <span class="o">:=</span> <span class="mi">0</span><span class="p">;</span> <span class="nx">i</span> <span class="p">&lt;</span> <span class="nb">len</span><span class="p">(</span><span class="nx">s</span><span class="p">);</span> <span class="nx">i</span><span class="o">++</span> <span class="p">{</span>
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Printf</span><span class="p">(</span><span class="s">&#34;%x &#34;</span><span class="p">,</span> <span class="nx">s</span><span class="p">[</span><span class="nx">i</span><span class="p">])</span>
<span class="p">}</span>
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Println</span><span class="p">()</span>
</pre>
</td>
</tr>
<tr>
<td class="docs">
<p>To count how many <em>runes</em> are in a string, we can use
the <code>utf8</code> package. Note that the run-time of
<code>RuneCountInString</code> dependes on the size of the string,
because it has to decode each UTF-8 rune sequentially.
Some Thai characters are represented by multiple UTF-8
code points, so the result of this count may be surprising.</p>
</td>
<td class="code leading">
<pre class="chroma">
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Println</span><span class="p">(</span><span class="s">&#34;Rune count:&#34;</span><span class="p">,</span> <span class="nx">utf8</span><span class="p">.</span><span class="nf">RuneCountInString</span><span class="p">(</span><span class="nx">s</span><span class="p">))</span>
</pre>
</td>
</tr>
<tr>
<td class="docs">
<p>A <code>range</code> loop handles strings specially and decodes
each <code>rune</code> along with its offset in the string.</p>
</td>
<td class="code leading">
<pre class="chroma">
<span class="k">for</span> <span class="nx">idx</span><span class="p">,</span> <span class="nx">runeValue</span> <span class="o">:=</span> <span class="k">range</span> <span class="nx">s</span> <span class="p">{</span>
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Printf</span><span class="p">(</span><span class="s">&#34;%#U starts at %d\n&#34;</span><span class="p">,</span> <span class="nx">runeValue</span><span class="p">,</span> <span class="nx">idx</span><span class="p">)</span>
<span class="p">}</span>
</pre>
</td>
</tr>
<tr>
<td class="docs">
<p>We can achieve the same iteration by using the
<code>utf8.DecodeRuneInString</code> function explicitly.</p>
</td>
<td class="code leading">
<pre class="chroma">
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Println</span><span class="p">(</span><span class="s">&#34;\nUsing DecodeRuneInString&#34;</span><span class="p">)</span>
<span class="k">for</span> <span class="nx">i</span><span class="p">,</span> <span class="nx">w</span> <span class="o">:=</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">;</span> <span class="nx">i</span> <span class="p">&lt;</span> <span class="nb">len</span><span class="p">(</span><span class="nx">s</span><span class="p">);</span> <span class="nx">i</span> <span class="o">+=</span> <span class="nx">w</span> <span class="p">{</span>
<span class="nx">runeValue</span><span class="p">,</span> <span class="nx">width</span> <span class="o">:=</span> <span class="nx">utf8</span><span class="p">.</span><span class="nf">DecodeRuneInString</span><span class="p">(</span><span class="nx">s</span><span class="p">[</span><span class="nx">i</span><span class="p">:])</span>
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Printf</span><span class="p">(</span><span class="s">&#34;%#U starts at %d\n&#34;</span><span class="p">,</span> <span class="nx">runeValue</span><span class="p">,</span> <span class="nx">i</span><span class="p">)</span>
<span class="nx">w</span> <span class="p">=</span> <span class="nx">width</span>
</pre>
</td>
</tr>
<tr>
<td class="docs">
<p>This demonstrates passing a <code>rune</code> value to a function.</p>
</td>
<td class="code leading">
<pre class="chroma">
<span class="nf">examineRune</span><span class="p">(</span><span class="nx">runeValue</span><span class="p">)</span>
<span class="p">}</span>
<span class="p">}</span>
</pre>
</td>
</tr>
<tr>
<td class="docs">
</td>
<td class="code leading">
<pre class="chroma"><span class="kd">func</span> <span class="nf">examineRune</span><span class="p">(</span><span class="nx">r</span> <span class="kt">rune</span><span class="p">)</span> <span class="p">{</span>
</pre>
</td>
</tr>
<tr>
<td class="docs">
<p>Values enclosed in single quotes are <em>rune literals</em>. We
can compare a <code>rune</code> value to a rune literal directly.</p>
</td>
<td class="code">
<pre class="chroma">
<span class="k">if</span> <span class="nx">r</span> <span class="o">==</span> <span class="sc">&#39;t&#39;</span> <span class="p">{</span>
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Println</span><span class="p">(</span><span class="s">&#34;found tee&#34;</span><span class="p">)</span>
<span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="nx">r</span> <span class="o">==</span> <span class="sc">&#39;ส&#39;</span> <span class="p">{</span>
<span class="nx">fmt</span><span class="p">.</span><span class="nf">Println</span><span class="p">(</span><span class="s">&#34;found so sua&#34;</span><span class="p">)</span>
<span class="p">}</span>
<span class="p">}</span>
</pre>
</td>
</tr>
</table>
<table>
<tr>
<td class="docs">
</td>
<td class="code leading">
<pre class="chroma"><span class="gp">$</span> go run strings-and-runes.go
<span class="go">Len: 18
</span><span class="go">e0 b8 aa e0 b8 a7 e0 b8 b1 e0 b8 aa e0 b8 94 e0 b8 b5
</span><span class="go">Rune count: 6
</span><span class="go">U+0E2A &#39;ส&#39; starts at 0
</span><span class="go">U+0E27 &#39;ว&#39; starts at 3
</span><span class="go">U+0E31 &#39;ั&#39; starts at 6
</span><span class="go">U+0E2A &#39;ส&#39; starts at 9
</span><span class="go">U+0E14 &#39;ด&#39; starts at 12
</span><span class="go">U+0E35 &#39;ี&#39; starts at 15</span></pre>
</td>
</tr>
<tr>
<td class="docs">
</td>
<td class="code">
<pre class="chroma"><span class="go">Using DecodeRuneInString
</span><span class="go">U+0E2A &#39;ส&#39; starts at 0
</span><span class="go">found so sua
</span><span class="go">U+0E27 &#39;ว&#39; starts at 3
</span><span class="go">U+0E31 &#39;ั&#39; starts at 6
</span><span class="go">U+0E2A &#39;ส&#39; starts at 9
</span><span class="go">found so sua
</span><span class="go">U+0E14 &#39;ด&#39; starts at 12
</span><span class="go">U+0E35 &#39;ี&#39; starts at 15</span></pre>
</td>
</tr>
</table>
<p class="next">
Next example: <a href="structs">Structs</a>.
</p>
<p class="footer">
by <a href="https://markmcgranaghan.com">Mark McGranaghan</a> and <a href="https://eli.thegreenplace.net">Eli Bendersky</a> | <a href="https://github.com/mmcgrana/gobyexample">source</a> | <a href="https://github.com/mmcgrana/gobyexample#license">license</a>
</p>
</div>
<script>
var codeLines = [];
codeLines.push('');codeLines.push('package main\u000A');codeLines.push('import (\u000A \"fmt\"\u000A \"unicode/utf8\"\u000A)\u000A');codeLines.push('func main() {\u000A');codeLines.push(' const s \u003D \"สวัสดี\"\u000A');codeLines.push(' fmt.Println(\"Len:\", len(s))\u000A');codeLines.push(' for i :\u003D 0; i \u003C len(s); i++ {\u000A fmt.Printf(\"%x \", s[i])\u000A }\u000A fmt.Println()\u000A');codeLines.push(' fmt.Println(\"Rune count:\", utf8.RuneCountInString(s))\u000A');codeLines.push(' for idx, runeValue :\u003D range s {\u000A fmt.Printf(\"%#U starts at %d\\n\", runeValue, idx)\u000A }\u000A');codeLines.push(' fmt.Println(\"\\nUsing DecodeRuneInString\")\u000A for i, w :\u003D 0, 0; i \u003C len(s); i +\u003D w {\u000A runeValue, width :\u003D utf8.DecodeRuneInString(s[i:])\u000A fmt.Printf(\"%#U starts at %d\\n\", runeValue, i)\u000A w \u003D width\u000A');codeLines.push(' examineRune(runeValue)\u000A }\u000A}\u000A');codeLines.push('func examineRune(r rune) {\u000A');codeLines.push(' if r \u003D\u003D \'t\' {\u000A fmt.Println(\"found tee\")\u000A } else if r \u003D\u003D \'ส\' {\u000A fmt.Println(\"found so sua\")\u000A }\u000A}\u000A');codeLines.push('');codeLines.push('');
</script>
<script src="site.js" async></script>
</body>
</html>

2
public/structs generated
View File

@ -9,7 +9,7 @@
onkeydown = (e) => {
if (e.key == "ArrowLeft") {
window.location.href = 'pointers';
window.location.href = 'strings-and-runes';
}