文档
Elasticsearch Hello World:全文搜索入门
目标
创建索引、插入文档、执行全文搜索和聚合分析,理解 ES 的 RESTful 搜索范式。
完整代码(curl)
# 1. 创建索引(含 mapping)
curl -X PUT "http://localhost:9200/library" -H "Content-Type: application/json" -d '{
"mappings": {
"properties": {
"title": {"type": "text", "analyzer": "standard"},
"author": {"type": "keyword"},
"year": {"type": "integer"},
"pages": {"type": "integer"},
"tags": {"type": "keyword"},
"description": {"type": "text"}
}
}
}'
# 2. 批量插入文档(_bulk API)
curl -X POST "http://localhost:9200/library/_bulk" -H "Content-Type: application/json" -d '
{"index": {"_id": "1"}}
{"title": "深入理解计算机系统", "author": "Randal E. Bryant", "year": 2015, "pages": 1080, "tags": ["cs", "systems"], "description": "从程序员视角理解计算机系统"}
{"index": {"_id": "2"}}
{"title": "算法导论", "author": "Thomas H. Cormen", "year": 2009, "pages": 1312, "tags": ["algorithms", "textbook"], "description": "全面介绍算法的经典教材"}
{"index": {"_id": "3"}}
{"title": "计算机网络:自顶向下方法", "author": "James F. Kurose", "year": 2017, "pages": 864, "tags": ["networking", "textbook"], "description": "计算机网络入门经典"}
{"index": {"_id": "4"}}
{"title": "操作系统概念", "author": "Abraham Silberschatz", "year": 2018, "pages": 976, "tags": ["os", "textbook"], "description": "操作系统原理经典教材"}
'
# 3. 全文搜索(match query)
curl -X GET "http://localhost:9200/library/_search" -H "Content-Type: application/json" -d '{
"query": {
"match": {
"title": "计算机 系统"
}
}
}'
# 4. 多字段搜索
curl -X GET "http://localhost:9200/library/_search" -H "Content-Type: application/json" -d '{
"query": {
"multi_match": {
"query": "algorithm textbook",
"fields": ["title^2", "description"]
}
}
}'
# 5. 过滤器 + 搜索
curl -X GET "http://localhost:9200/library/_search" -H "Content-Type: application/json" -d '{
"query": {
"bool": {
"must": {"match": {"description": "经典"}},
"filter": {"range": {"year": {"gte": 2015}}}
}
}
}'
# 6. 聚合分析(按 tag 分组统计)
curl -X GET "http://localhost:9200/library/_search" -H "Content-Type: application/json" -d '{
"size": 0,
"aggs": {
"by_tag": {
"terms": {"field": "tags"}
}
}
}'
# 7. 高亮显示
curl -X GET "http://localhost:9200/library/_search" -H "Content-Type: application/json" -d '{
"query": {"match": {"description": "经典"}},
"highlight": {
"fields": {"description": {}}
}
}'
Python 版本
# pip install elasticsearch
from elasticsearch import Elasticsearch
es = Elasticsearch(['http://localhost:9200'])
# 创建索引
es.indices.create(index='library', body={
'mappings': {
'properties': {
'title': {'type': 'text'},
'author': {'type': 'keyword'},
'year': {'type': 'integer'},
'tags': {'type': 'keyword'}
}
}
}, ignore=400) # ignore 400 索引已存在
# 插入文档
doc = {
'title': '深入理解计算机系统',
'author': 'Randal E. Bryant',
'year': 2015,
'tags': ['cs', 'systems']
}
es.index(index='library', id=1, body=doc)
# 搜索
result = es.search(index='library', body={
'query': {'match': {'title': '计算机'}},
'highlight': {'fields': {'title': {}}}
})
for hit in result['hits']['hits']:
print(f"得分 {hit['_score']:.2f}: {hit['_source']['title']}")
if 'highlight' in hit:
print(f" 高亮: {hit['highlight']['title']}")
# 聚合
agg_result = es.search(index='library', body={
'size': 0,
'aggs': {'popular_tags': {'terms': {'field': 'tags'}}}
})
for bucket in agg_result['aggregations']['popular_tags']['buckets']:
print(f"{bucket['key']}: {bucket['doc_count']} 本")
预期输出
// 全文搜索结果
{
"hits": {
"total": {"value": 2},
"hits": [
{"_source": {"title": "深入理解计算机系统"}, "_score": 1.2},
{"_source": {"title": "计算机网络:自顶向下方法"}, "_score": 0.8}
]
}
}
// 聚合结果
{
"aggregations": {
"by_tag": {
"buckets": [
{"key": "textbook", "doc_count": 3},
{"key": "algorithms", "doc_count": 1}
]
}
}
}
关键点
text类型字段会被分词,适合全文搜索;keyword类型用于精确匹配和聚合_bulkAPI 高效批量导入boolquery 组合 must/should/must_not/filteraggregations实现分组统计,类似 SQL 的 GROUP BY- 搜索结果按
_score(相关度评分)降序排列