文档
Go 并发爬虫 — goroutine + channel 实战
目标
演示 Go 核心卖点:goroutine 轻量并发 + channel 通信。编写一个并发检查多个 URL 响应状态的工具。
完整代码
package main
import (
"fmt"
"net/http"
"sync"
"time"
)
// URLResult 封装单个 URL 检测结果
type URLResult struct {
URL string
StatusCode int
Duration time.Duration
Error error
}
// checkURL 检测单个 URL,结果写入 channel
func checkURL(url string, resultCh chan<- URLResult) {
start := time.Now()
resp, err := http.Get(url)
duration := time.Since(start)
if err != nil {
resultCh <- URLResult{URL: url, Duration: duration, Error: err}
return
}
defer resp.Body.Close()
resultCh <- URLResult{URL: url, StatusCode: resp.StatusCode, Duration: duration}
}
func main() {
urls := []string{
"https://www.baidu.com",
"https://www.github.com",
"https://www.google.com",
"https://httpbin.org/delay/2",
"https://www.zhihu.com",
}
resultCh := make(chan URLResult, len(urls))
// 🔥 启动 goroutine:每个 URL 一个轻量级线程
var wg sync.WaitGroup
for _, url := range urls {
wg.Add(1)
go func(u string) {
defer wg.Done()
checkURL(u, resultCh)
}(url)
}
// 等待所有 goroutine 完成后关闭 channel
go func() {
wg.Wait()
close(resultCh)
}()
// 从 channel 读取结果
fmt.Println("🌐 URL 健康检查结果:")
fmt.Println("-----------------------------")
successCount, failCount := 0, 0
for result := range resultCh {
if result.Error != nil {
fmt.Printf("❌ %-35s | 错误: %v\n", result.URL, result.Error)
failCount++
} else {
fmt.Printf("✅ %-35s | %3d | %v\n", result.URL, result.StatusCode, result.Duration)
successCount++
}
}
fmt.Println("-----------------------------")
fmt.Printf("成功: %d 失败: %d 总计: %d\n", successCount, failCount, len(urls))
}
运行步骤
go mod init healthcheck
go run main.go
预期输出
🌐 URL 健康检查结果:
-----------------------------
✅ https://www.baidu.com | 200 | 156ms
✅ https://www.github.com | 200 | 890ms
❌ https://www.google.com | 错误: context deadline exceeded
✅ https://httpbin.org/delay/2 | 200 | 2.1s
✅ https://www.zhihu.com | 200 | 420ms
-----------------------------
成功: 4 失败: 1 总计: 5
关键知识点
go func()创建 goroutine,开销仅 2KB 栈chan安全地在 goroutine 间传递数据sync.WaitGroup等待所有 goroutine 完成- 所有 URL 并发检测,总耗时约等于最慢的单个请求