diff --git a/.github/workflows/docsearch.yml b/.github/workflows/docsearch.yml new file mode 100644 index 000000000..bb2397bf0 --- /dev/null +++ b/.github/workflows/docsearch.yml @@ -0,0 +1,24 @@ +name: Index docs to Typesense + +on: + push: + branches: + - master + +jobs: + index_docs: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Index docs to Typesense + run: | + docker run \ + -e TYPESENSE_API_KEY=${{ vars.TYPESENSE_API_KEY }} \ + -e TYPESENSE_HOST="${{ vars.TYPESENSE_HOST }}" \ + -e TYPESENSE_PORT="443" \ + -e TYPESENSE_PROTOCOL="https" \ + -e CONFIG="$(cat docs/docsearch-scraper-config.json | jq -r tostring)" \ + typesense/docsearch-scraper \ No newline at end of file diff --git a/docs/docsearch-scraper-config.json b/docs/docsearch-scraper-config.json new file mode 100644 index 000000000..8c0ff6791 --- /dev/null +++ b/docs/docsearch-scraper-config.json @@ -0,0 +1,49 @@ +{ + "index_name": "typeorm-docs", + "start_urls": [ + "https://typeorm.io/" + ], + "sitemap_urls": [ + "https://typeorm.io/sitemap.xml" + ], + "allowed_domains":["typeorm.io"], + "sitemap_alternate_links": true, + "stop_urls": [], + "selectors": { + "lvl0": { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]", + "type": "xpath", + "global": true, + "default_value": "Documentation" + }, + "lvl1": "article h1, header h1", + "lvl2": "article h2", + "lvl3": "article h3", + "lvl4": "article h4", + "lvl5": "article h5, article td:first-child", + "lvl6": "article h6", + "text": "article p, article li, article td:last-child" + }, + "strip_chars": " .,;:#", + "custom_settings": { + "separatorsToIndex": "_", + "attributesForFaceting": [ + "language", + "version", + "type", + "docusaurus_tag" + ], + "attributesToRetrieve": [ + "hierarchy", + "content", + "anchor", + "url", + "url_without_anchor", + "type" + ] + }, + "conversation_id": [ + "833762294" + ], + "nb_hits": 0 +} \ No newline at end of file