安装
docker network create elastic
docker pull docker.elastic.co/elasticsearch/elasticsearch:8.10.4
# 增加虚拟内存, 此处适用于linux
vim /etc/sysctl.conf # 添加 vm.max_map_count=262144
# 重新启动
sysctl vm.max_map_count
docker run --name es01 --net elastic -p 9200:9200 -it -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.10.4
成功后会出现下图
 
 将密码保存,可以先重置:
docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
复制证书以供ssl访问
docker cp es01:/usr/share/elasticsearch/config/certs/http_ca.crt .
使用
connect
from elasticsearch import Elasticsearch
NODES = [
    "https://localhost:9200",
    "https://localhost:9201",
    "https://localhost:9202",
]
# Password for the 'elastic' user generated by Elasticsearch
ELASTIC_PASSWORD = "<password>"
# Create the client instance
client = Elasticsearch(
    NODES,
    ca_certs="/path/to/http_ca.crt",
    basic_auth=("elastic", ELASTIC_PASSWORD)
    # 支持多种登陆方式
    # api_key=("api_key.id", "api_key.api_key")
    # bearer_auth="token-value"
    # ssl_assert_fingerprint=CERT_FINGERPRINT,
)
# Successful response!
client.info()
index
mapping = {
    "settings": {
        "analysis": {
            "analyzer": {
                "latex_analyzer": {
                    "type": "standard",  # 使用标准分析器作为基础
                    "stopwords": "_none_"  # 不使用停用词
                }
            }
        }
    },
    "mappings": {
        "properties": {
            "question": {
                "type": "text",
                "analyzer": "latex_analyzer"  # 使用上面定义的分析器
            }
        }
    }
}
# 创建索引
es.indices.create(index="questions_index", body=mapping)
insert
from elasticsearch import Elasticsearch, helpers
insert_df = df["question"].to_frame()
insert_df.head()
def doc_generator(df, index_name):
    df_iter = df.iterrows()
    for index, document in df_iter:
        yield {
            "_index": index_name,
            "_id": index,      
            "_source": document.to_dict(),
        }
helpers.bulk(es, doc_generator(insert_df, 'questions_index'))
query
search_query = {
    "query": {
        "match": {
            "question": "数轴上A、B两点所表示的有理数的和是"
        }
    }
}
response = es.search(index="questions_index", body=search_query)
for hit in response['hits']['hits']:
    print(hit['_source']['question'])
import ujson
# 原始查询
query_string = '''{
    "query": {
        "bool": {
            "must": [
                {
                    "bool": {
                        "must": [
                            {
                                "bool": {
                                    "should": [
                                        {
                                            "multi_match": {
                                                "query": "数轴上A、B两点所表示的有理数的和是",
                                                "fields": ["question^3", "question.raw^3", "question.search^1", "question.autosuggest^1", "question.english^1"],
                                                "type": "cross_fields",
                                                "operator": "and"
                                            }
                                        },
                                        {
                                            "multi_match": {
                                                "query": "数轴上A、B两点所表示的有理数的和是",
                                                "fields": ["question^3", "question.raw^3", "question.search^1", "question.autosuggest^1", "question.english^1"],
                                                "type": "phrase",
                                                "operator": "and"
                                            }
                                        },
                                        {
                                            "multi_match": {
                                                "query": "数轴上A、B两点所表示的有理数的和是",
                                                "fields": ["question^3", "question.raw^3", "question.english^1"],
                                                "type": "phrase_prefix",
                                                "operator": "and"
                                            }
                                        }
                                    ],
                                    "minimum_should_match": "1"
                                }
                            }
                        ]
                    }
                }
            ]
        }
    }
}'''
query_dict = ujson.loads(query_string)
response = es.search(index='questions_index', body=query_dict)
for hit in response['hits']['hits']:
    print(hit['_source'])









