使用Go、Elasticsearch 建立全文檢索引擎

準備工作

curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh

// 使用Docker
docker pull nshou/elasticsearch-kibana
docker run -it -d -p 9200:9200 -p 5601:5601 nshou/elasticsearch-kibana
  • 專案目錄下建立 search_api 目錄(位於 $goparh/src/專案名稱/ 下)

search_api 目錄下

  • 執行 dep init
  • 建立Dockerfile
FROM golang:1.10.0

RUN adduser --disabled-password --gecos '' api
USER api

WORKDIR /go/src/app
COPY . .

RUN go install -v ./...

CMD [ "app" ]
  • 撰寫search.go
package main

import (
    "encoding/json"
    "fmt"
    "log"
    "net/http"
    "strconv"
    "time"

    "github.com/gin-gonic/gin"
    "github.com/olivere/elastic"
    "github.com/teris-io/shortid"
)

const (
    elasticIndexName = "documents"
    elasticTypeName  = "document"
)

type Document struct {
    ID        string    `json:"id"`
    Title     string    `json:"title"`
    CreatedAt time.Time `json:"created_at"`
    Content   string    `json:"content"`
}

type DocumentRequest struct {
    Title   string `json:"title"`
    Content string `json:"content"`
}

type DocumentResponse struct {
    Title     string    `json:"title"`
    CreatedAt time.Time `json:"created_at"`
    Content   string    `json:"content"`
}

type SearchResponse struct {
    Time      string             `json:"time"`
    Hits      string             `json:"hits"`
    Documents []DocumentResponse `json:"documents"`
}

var (
    elasticClient *elastic.Client
)

func main() {
    var err error
    // Create Elastic client and wait for Elasticsearch to be ready
    for {
        elasticClient, err = elastic.NewClient(
            elastic.SetURL("http://elasticsearch:9200"),
            elastic.SetSniff(false),
        )
        if err != nil {
            log.Println(err)
            // Retry every 3 seconds
            time.Sleep(3 * time.Second)
        } else {
            break
        }
    }
    // Start HTTP server
    r := gin.Default()
    r.POST("/documents", createDocumentsEndpoint)
    r.GET("/search", searchEndpoint)
    if err = r.Run(":8080"); err != nil {
        log.Fatal(err)
    }
}

func createDocumentsEndpoint(c *gin.Context) {
    // Parse request
    var docs []DocumentRequest
    if err := c.BindJSON(&docs); err != nil {
        errorResponse(c, http.StatusBadRequest, "Malformed request body")
        return
    }
    // Insert documents in bulk
    bulk := elasticClient.
        Bulk().
        Index(elasticIndexName).
        Type(elasticTypeName)
    for _, d := range docs {
        doc := Document{
            ID:        shortid.MustGenerate(),
            Title:     d.Title,
            CreatedAt: time.Now().UTC(),
            Content:   d.Content,
        }
        bulk.Add(elastic.NewBulkIndexRequest().Id(doc.ID).Doc(doc))
    }
    if _, err := bulk.Do(c.Request.Context()); err != nil {
        log.Println(err)
        errorResponse(c, http.StatusInternalServerError, "Failed to create documents")
        return
    }
    c.Status(http.StatusOK)
}

func searchEndpoint(c *gin.Context) {
    // Parse request
    query := c.Query("query")
    if query == "" {
        errorResponse(c, http.StatusBadRequest, "Query not specified")
        return
    }
    skip := 0
    take := 10
    if i, err := strconv.Atoi(c.Query("skip")); err == nil {
        skip = i
    }
    if i, err := strconv.Atoi(c.Query("take")); err == nil {
        take = i
    }
    // Perform search
    esQuery := elastic.NewMultiMatchQuery(query, "title", "content").
        Fuzziness("2").
        MinimumShouldMatch("2")
    result, err := elasticClient.Search().
        Index(elasticIndexName).
        Query(esQuery).
        From(skip).Size(take).
        Do(c.Request.Context())
    if err != nil {
        log.Println(err)
        errorResponse(c, http.StatusInternalServerError, "Something went wrong")
        return
    }
    res := SearchResponse{
        Time: fmt.Sprintf("%d", result.TookInMillis),
        Hits: fmt.Sprintf("%d", result.Hits.TotalHits),
    }
    // Transform search results before returning them
    docs := make([]DocumentResponse, 0)
    for _, hit := range result.Hits.Hits {
        var doc DocumentResponse
        json.Unmarshal(*hit.Source, &doc)
        docs = append(docs, doc)
    }
    res.Documents = docs
    c.JSON(http.StatusOK, res)
}

func errorResponse(c *gin.Context, code int, err string) {
    c.JSON(code, gin.H{
        "error": err,
    })
}
  • dep ensure

Docker-compose

  • 回到專案目錄下,建立 docker-compose.yaml
version: '3.5'
services:
  search_api:
    container_name: 'search_api'
    build: './search-api'
    restart: 'on-failure'
    ports:
      - '8080:8080'
    depends_on:
      - elasticsearch
  elasticsearch:
    container_name: 'elasticsearch'
    image: 'docker.elastic.co/elasticsearch/elasticsearch:6.2.3'
    ports:
      - '9200:9200'

執行

docker-compose up -d --build
// 使用測試資料
curl -X POST http://localhost:8080/documents -d @fake-data.json -H "Content-Type: application/json"

測試

$ curl http://localhost:8080/search?query=exercitation+est+officia
{
  "time": "42",
  "hits": "43",
  "documents": [{
      "title": "Exercitation est officia fugiat labore deserunt est id voluptate magna.",
      "created_at": "2018-03-21T15:22:48.7830606Z",
      "content": "..."
    },
    // ...
  ]
}

參考資料