Release Khoj version 0.9.0

Clean up search type usage in khoj server, tests and Readme
Fix triggering index update on khoj server from khoj.el
2026-05-13 21:41:41 +00:00 · 2023-07-18 19:59:27 -07:00 · 2023-07-18 19:57:55 -07:00 · 2023-07-18 19:57:54 -07:00 · 2023-07-18 19:57:54 -07:00 · 2023-07-18 17:54:11 -07:00
51 changed files with 1203 additions and 750 deletions
--- a/.github/workflows/build_desktop.yml
+++ b/.github/workflows/build_desktop.yml
@@ -0,0 +1,108 @@
+name: desktop_dev_build
+
+on:
+  push:
+    branches:
+      - master
+  workflow_dispatch:
+
+jobs:
+  publish_desktop_apps:
+    name: 🖥️ Publish Desktop Apps
+
+    strategy:
+      matrix:
+        include:
+        - os: ubuntu-latest
+          extension: deb
+        - os: macos-latest
+          extension: dmg
+        - os: windows-latest
+          extension: exe
+
+    runs-on: ${{ matrix.os }}
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+
+      - name: ⏬️ Install Dependencies
+        shell: bash
+        run: |
+          if [ "$RUNNER_OS" == "Linux" ]; then
+            sudo apt install libegl1 libxcb-xinerama0 python3-tk -y
+          fi
+          python -m pip install --upgrade pip
+          pip install pyinstaller
+
+      - name: ⬇️ Install Khoj App
+        run: |
+          pip install --upgrade .
+
+      - name: 📦 Package Khoj App
+        shell: bash
+        run: |
+          # Setup Environment for Reproducible Builds
+          export PYTHONHASHSEED=42
+          export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct)
+
+          pyinstaller --noconfirm Khoj.spec
+          if [ "$RUNNER_OS" == "Windows" ]; then
+            mv dist/Khoj.exe dist/khoj_dev_amd64.exe
+          fi
+
+      - name: 💻 Create Mac App DMG
+        if: matrix.os == 'macos-latest'
+        run: |
+         # Install Mac DMG Creator
+          brew install create-dmg
+          # Copy app to separate dmg folder
+          mkdir -p dist/dmg && cp -r dist/Khoj.app dist/dmg
+          # Create disk image with the app
+          create-dmg \
+            --volname "Khoj" \
+            --volicon "src/khoj/interface/web/assets/icons/favicon.icns" \
+            --window-pos 200 120 \
+            --window-size 600 300 \
+            --icon-size 100 \
+            --icon "Khoj.app" 175 120 \
+            --hide-extension "Khoj.app" \
+            --app-drop-link 425 120 \
+            "dist/khoj_dev_amd64.dmg" \
+            "dist/dmg/"
+
+      - uses: ruby/setup-ruby@v1
+        if: matrix.os == 'ubuntu-latest'
+        with:
+          ruby-version: '3.0'
+
+      - name: 🐧 Create Debian Package
+        if: matrix.os == 'ubuntu-latest'
+        shell: bash
+        run: |
+          # Install Debian Packager
+          gem install fpm
+
+          # Copy app files into expected output directory structure
+          mkdir -p package/opt package/usr/share/applications package/usr/share/icons/hicolor/128x128/apps
+          cp -r dist/Khoj package/opt/Khoj
+          cp src/khoj/interface/web/assets/icons/favicon-128x128.png package/usr/share/icons/hicolor/128x128/apps/Khoj.png
+          cp Khoj.desktop package/usr/share/applications
+
+          # Fix permissions to be usable by non-root users
+          find package/usr/share -type f -exec chmod 644 -- {} +
+          chmod 755 package/opt/Khoj
+
+          # Package the app
+          fpm -C package -s dir -t deb -n Khoj -p dist/khoj_dev_amd64.deb
+
+      - uses: actions/upload-artifact@v3
+        with:
+          name: khoj_dev_amd64.${{matrix.extension}}
+          path: dist/khoj_dev_amd64.${{matrix.extension}}
+          retention-days: 1
--- a/.github/workflows/dockerize.yml
+++ b/.github/workflows/dockerize.yml
@@ -44,4 +44,4 @@ jobs:
          push: true
          tags: ghcr.io/${{ github.repository }}:${{ env.DOCKER_IMAGE_TAG }}
          build-args: |
-            PORT=8000
+            PORT=42110
--- a/9
+++ b/9
@@ -4,11 +4,12 @@ LABEL org.opencontainers.image.source https://github.com/khoj-ai/khoj

 # Install System Dependencies
 RUN apt update -y && \
-    apt -y install python3-pip python3-pyqt6 git
+    apt -y install python3-pip git

-# Install Python Dependencies
-RUN pip install --upgrade pip && \
-    pip install git+https://github.com/khoj-ai/khoj.git
+# Install Application
+COPY . .
+RUN sed -i 's/dynamic = \["version"\]/version = "0.0.0"/' pyproject.toml && \
+    pip install --no-cache-dir .

 # Run the Application
 # There are more arguments required for the application to run,
--- a/README.md
+++ b/README.md
@@ -47,10 +47,10 @@
  - [Miscellaneous](#Miscellaneous-1)
 - [Development](#Development)
  - [Visualize Codebase](#visualize-codebase)
+  - [Create Release](#create-khoj-release)
  - [Setup](#Setup)
    - [Using Pip](#Using-Pip)
    - [Using Docker](#Using-Docker)
-    - [Using Conda](#Using-Conda)
  - [Validate](#Validate)
 - [Credits](#Credits)

@@ -169,9 +169,9 @@ The optional steps below allow using Khoj from within an existing application li
 - **Khoj via Emacs**
  - Run `M-x khoj <user-query>`
 - **Khoj via Web**
-  - Open <http://localhost:8000/> directly
+  - Open <http://localhost:42110/> directly
 - **Khoj via API**
-  - See the Khoj FastAPI [Swagger Docs](http://localhost:8000/docs), [ReDocs](http://localhost:8000/redocs)
+  - See the Khoj FastAPI [Swagger Docs](http://localhost:42110/docs), [ReDocs](http://localhost:42110/redocs)

 <details><summary>Query Filters</summary>

@@ -207,7 +207,7 @@ Use structured query syntax to filter the natural language search results
 - [Setup your OpenAI API key in Khoj](#set-your-openai-api-key-in-khoj)

 #### Use
-1. Open [/chat](http://localhost:8000/chat)[^2]
+1. Open [/chat](http://localhost:42110/chat)[^2]
 2. Type your queries and see response by Khoj from your notes

 #### Demo
@@ -256,7 +256,7 @@ pip install --upgrade --pre khoj-assistant
 - **Refer**: [Issue with Fix](https://github.com/khoj-ai/khoj/issues/82#issuecomment-1241890946) for more details

 #### Search starts giving wonky results
- **Fix**: Open [/api/update?force=true](http://localhost:8000/api/update?force=true)[^2] in browser to regenerate index from scratch
+- **Fix**: Open [/api/update?force=true](http://localhost:42110/api/update?force=true)[^2] in browser to regenerate index from scratch
 - **Note**: *This is a fix for when you percieve the search results have degraded. Not if you think they've always given wonky results*

 #### Khoj in Docker errors out with \"Killed\" in error message
@@ -270,7 +270,7 @@ pip install --upgrade --pre khoj-assistant
 ### Access Khoj on Mobile
 1. [Setup Khoj](#Setup) on your personal server. This can be any always-on machine, i.e an old computer, RaspberryPi(?) etc
 2. [Install](https://tailscale.com/kb/installation/) [Tailscale](tailscale.com/) on your personal server and phone
-3. Open the Khoj web interface of the server from your phone browser.<br /> It should be `http://tailscale-ip-of-server:8000` or `http://name-of-server:8000` if you've setup [MagicDNS](https://tailscale.com/kb/1081/magicdns/)
+3. Open the Khoj web interface of the server from your phone browser.<br /> It should be `http://tailscale-ip-of-server:42110` or `http://name-of-server:42110` if you've setup [MagicDNS](https://tailscale.com/kb/1081/magicdns/)
 4. Click the [Add to Homescreen](https://developer.mozilla.org/en-US/docs/Web/Progressive_web_apps/Add_to_home_screen) button
 5. Enjoy exploring your notes, documents and images from your phone!

@@ -311,7 +311,7 @@ pip install --upgrade --pre khoj-assistant
     model_directory: "~/.khoj/search/asymmetric/"
  ```

-  2. Regenerate your content index. For example, by opening [\<khoj-url\>/api/update?t=force](http://localhost:8000/api/update?t=force)
+  2. Regenerate your content index. For example, by opening [\<khoj-url\>/api/update?force=true](http://localhost:42110/api/update?force=true)

 ### Bootstrap Khoj Search for Offline Usage later

@@ -330,7 +330,7 @@ pip install --upgrade --pre khoj-assistant
 ### Set your OpenAI API key in Khoj
 If you want, Khoj can be configured to use OpenAI for search and chat.<br />
 Add your OpenAI API to Khoj by using either of the two options below:
- - Open your [Khoj settings](http://localhost:8000/config/processor/conversation), add your OpenAI API key, and click *Save*. Then go to your [Khoj settings](http://localhost:8000/config) and click `Configure`. This will refresh Khoj with your OpenAI API key.
+ - Open your [Khoj settings](http://localhost:42110/config/processor/conversation), add your OpenAI API key, and click *Save*. Then go to your [Khoj settings](http://localhost:42110/config) and click `Configure`. This will refresh Khoj with your OpenAI API key.
 - Set `openai-api-key` field under `processor.conversation` section in your `khoj.yml`[^1] to your [OpenAI API key](https://beta.openai.com/account/api-keys) and restart khoj:
    ```diff
    processor:
@@ -344,11 +344,11 @@ Add your OpenAI API to Khoj by using either of the two options below:
 **Warning**: *This will enable Khoj to send your query and note(s) to OpenAI for processing*

 ### GPT API
- The [chat](http://localhost:8000/api/chat), [answer](http://localhost:8000/api/beta/answer) and [search](http://localhost:8000/api/beta/search) API endpoints use [OpenAI API](https://openai.com/api/)
+- The [chat](http://localhost:42110/api/chat), [answer](http://localhost:42110/api/beta/answer) and [search](http://localhost:42110/api/beta/search) API endpoints use [OpenAI API](https://openai.com/api/)
 - They are disabled by default
 - To use them:
  1. [Setup your OpenAI API key in Khoj](#set-your-openai-api-key-in-khoj)
-  2. Interact with them from the [Khoj Swagger docs](http://locahost:8000/docs)[^2]
+  2. Interact with them from the [Khoj Swagger docs](http://locahost:42110/docs)[^2]

 ### Index Github Repository for Search, Chat
 The Khoj Github plugin can index issues, commit messages and markdown, org-mode and PDF files from any repositories you have access to. This allows you to chat or search with these repositories. Get answers, resolve issues or just explore a repo with the help of your AI personal assistant.
@@ -388,6 +388,19 @@ Note: *Khoj will ignore code files in the repository for now as the default AI m

 ![](https://github.com/khoj-ai/khoj/blob/master/docs/khoj_codebase_visualization_0.2.1.png?)

+### Create Khoj Release
+Follow the steps below to [release](https://github.com/debanjum/khoj/releases/) Khoj. This will create a stable release of Khoj on [Pypi](https://pypi.org/project/khoj-assistant/), [Melpa](https://stable.melpa.org/#%252Fkhoj) and [Obsidian](https://obsidian.md/plugins?id%253Dkhoj). It will also create desktop apps of Khoj and attach them to the latest release.
+
+1. Create and tag release commit by running the bump_version script. The release commit sets version number in required metadata files.
+  ```shell
+  ./scripts/bump_version.sh -c "<release_version>"
+  ```
+2. Push commit and then the tag to trigger the release workflow to create Release with auto generated release notes.
+  ```shell
+  git push origin master  # push release commit to khoj repository
+  git push origin <release_version>  # push release tag to khoj repository
+  ```
+3. [Optional] Update the Release Notes to highlight new features, fixes and updates
 ### Setup
 #### Using Pip
 ##### 1. Install
@@ -409,7 +422,7 @@ pip install -e .[dev]
   khoj -vv
   ```
 2. Configure Khoj
-   - **Via the Settings UI**: Add files, directories to index the [Khoj settings](http://localhost:8000/config) UI once Khoj has started up. Once you've saved all your settings, click `Configure`.
+   - **Via the Settings UI**: Add files, directories to index the [Khoj settings](http://localhost:42110/config) UI once Khoj has started up. Once you've saved all your settings, click `Configure`.
   - **Manually**:
     - Copy the `config/khoj_sample.yml` to `~/.khoj/khoj.yml`
     - Set `input-files` or `input-filter` in each relevant `content-type` section of `~/.khoj/khoj.yml`
@@ -445,39 +458,6 @@ docker-compose up -d
 docker-compose build --pull
 ```

-#### Using Conda
-##### 1. Install Dependencies
- [Install Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html)
-
-##### 2. Install Khoj
-```shell
-git clone https://github.com/khoj-ai/khoj && cd khoj
-conda env create -f config/environment.yml
-conda activate khoj
-python3 -m pip install pyqt6  # As conda does not support pyqt6 yet
-```
-
-##### 3. Configure
- Copy the `config/khoj_sample.yml` to `~/.khoj/khoj.yml`
- Set `input-files` or `input-filter` in each relevant `content-type` section of `~/.khoj/khoj.yml`
-  - Set `input-directories` field in `image` `content-type` section
- Delete `content-type`, `processor` sub-sections irrelevant for your use-case
-
-##### 4. Run
-```shell
-python3 -m src.khoj.main -vv
-```
-  Load ML model, generate embeddings and expose API to query notes, images, documents etc specified in config YAML
-
-##### 5. Upgrade
-```shell
-cd khoj
-git pull origin master
-conda deactivate khoj
-conda env update -f config/environment.yml
-conda activate khoj
-```
-
 ### Validate
 #### Before Make Changes
 1. Install Git Hooks for Validation
@@ -522,4 +502,4 @@ conda activate khoj

 [^1]: Default Khoj config file @ `~/.khoj/khoj.yml`

-[^2]: Default Khoj url @ http://localhost:8000
+[^2]: Default Khoj url @ http://localhost:42110
--- a/config/environment.yml
+++ b/config/environment.yml
@@ -1,22 +0,0 @@
-name: khoj
-channels:
-  - conda-forge
-dependencies:
-  - python=3.8.*
-  - numpy=1.22.4
-  - pytorch=1.13.1
-  - torchvision=0.14.1
-  - transformers=4.21.0
-  - sentence-transformers=2.1.0
-  - fastapi=0.77.1
-  - uvicorn=0.17.6
-  - pyyaml=6.0
-  - pytest=7.1.2
-  - pillow=9.3.0
-  - openai=0.20.0
-  - pydantic=1.9.1
-  - jinja2=3.1.2
-  - aiofiles=0.8.0
-  - huggingface_hub=0.8.1
-  - dateparser=1.1.1
-  - schedule=1.1.0
--- a/config/environment_osx-arm64.yml
+++ b/config/environment_osx-arm64.yml
@@ -1,116 +0,0 @@
-name: khoj
-channels:
-  - conda-forge
-dependencies:
-  - aiofiles=0.8.0=pyhd8ed1ab_0
-  - asgiref=3.4.1=pyhd8ed1ab_0
-  - attrs=21.2.0=pyhd8ed1ab_0
-  - brotlipy=0.7.0=py39h5161555_1001
-  - ca-certificates=2022.6.15=h4653dfc_0
-  - certifi=2022.6.15=py39h2804cbe_0
-  - cffi=1.14.6=py39hda8b47f_0
-  - chardet=4.0.0=py39h2804cbe_1
-  - charset-normalizer=2.0.0=pyhd8ed1ab_0
-  - click=8.0.1=py39h2804cbe_0
-  - colorama=0.4.4=pyh9f0ad1d_0
-  - cryptography=3.4.7=py39h73257c9_0
-  - dataclasses=0.8=pyhc8e2a94_3
-  - dateparser=1.1.1=pyhd8ed1ab_0
-  - et_xmlfile=1.0.1=py_1001
-  - fastapi=0.68.2=pyhd8ed1ab_0
-  - filelock=3.0.12=pyh9f0ad1d_0
-  - freetype=2.10.4=h17b34a0_1
-  - future=0.18.2=py39h2804cbe_3
-  - h11=0.12.0=pyhd8ed1ab_0
-  - huggingface_hub=0.2.1=pyhd8ed1ab_0
-  - idna=3.1=pyhd3deb0d_0
-  - importlib-metadata=4.6.4=py39h2804cbe_0
-  - importlib_metadata=4.6.4=hd8ed1ab_0
-  - iniconfig=1.1.1=pyh9f0ad1d_0
-  - jbig=2.1=h3422bc3_2003
-  - jinja2=3.0.3=pyhd8ed1ab_0
-  - joblib=1.0.1=pyhd8ed1ab_0
-  - jpeg=9d=h27ca646_0
-  - lcms2=2.12=had6a04f_0
-  - lerc=2.2.1=h9f76cd9_0
-  - libblas=3.9.0=11_osxarm64_openblas
-  - libcblas=3.9.0=11_osxarm64_openblas
-  - libcxx=12.0.1=h168391b_0
-  - libdeflate=1.7=h27ca646_5
-  - libffi=3.3=h9f76cd9_2
-  - libgfortran=5.0.0.dev0=11_0_1_hf114ba7_23
-  - libgfortran5=11.0.1.dev0=hf114ba7_23
-  - liblapack=3.9.0=11_osxarm64_openblas
-  - libopenblas=0.3.17=openmp_h5dd58f0_1
-  - libpng=1.6.37=hf7e6567_2
-  - libprotobuf=3.16.0=hccf11d3_0
-  - libtiff=4.3.0=hc6122e1_1
-  - libwebp-base=1.2.1=h3422bc3_0
-  - llvm-openmp=12.0.1=hf3c4609_1
-  - lz4-c=1.9.3=hbdafb3b_1
-  - markupsafe=2.0.1=py39h5161555_1
-  - more-itertools=8.8.0=pyhd8ed1ab_0
-  - ncurses=6.2=h9aa5885_4
-  - ninja=1.10.2=h4d860bb_0
-  - nltk=3.6.2=pyhd8ed1ab_0
-  - numpy=1.21.4=py39h1f3b974_0
-  - olefile=0.46=pyh9f0ad1d_1
-  - openai=0.11.4=py39h2804cbe_0
-  - openjpeg=2.4.0=h062765e_1
-  - openpyxl=3.0.9=pyhd8ed1ab_0
-  - openssl=1.1.1q=ha287fd2_0
-  - packaging=21.0=pyhd8ed1ab_0
-  - pandas=1.3.4=py39h7f752ed_1
-  - pandas-stubs=1.2.0.38=py39h2804cbe_0
-  - pillow=8.3.2=py39ha74c66e_0
-  - pip=21.2.4=pyhd8ed1ab_0
-  - pluggy=0.13.1=py39h2804cbe_4
-  - py=1.10.0=pyhd3deb0d_0
-  - pycparser=2.20=pyh9f0ad1d_2
-  - pydantic=1.8.2=py39h5161555_2
-  - pyopenssl=20.0.1=pyhd8ed1ab_0
-  - pyparsing=2.4.7=pyh9f0ad1d_0
-  - pysocks=1.7.1=py39h2804cbe_3
-  - pytest=6.2.5=py39h2804cbe_1
-  - python=3.9.7=h54d631c_3_cpython
-  - python-dateutil=2.8.2=pyhd8ed1ab_0
-  - python-tzdata=2022.1=pyhd8ed1ab_0
-  - python_abi=3.9=2_cp39
-  - pytorch=1.9.0=cpu_py39he8fdc14_2
-  - pytorch-cpu=1.9.0=cpu_py39hd610c6a_2
-  - pytz=2021.3=pyhd8ed1ab_0
-  - pytz-deprecation-shim=0.1.0.post0=py39h2804cbe_2
-  - pyyaml=5.4.1=py39h5161555_1
-  - readline=8.1=hedafd6a_0
-  - regex=2021.8.21=py39h5161555_0
-  - requests=2.26.0=pyhd8ed1ab_0
-  - sacremoses=0.0.43=pyh9f0ad1d_0
-  - scikit-learn=0.24.2=py39hef7049f_1
-  - scipy=1.7.0=py39h5060c3b_0
-  - sentence-transformers=2.1.0=pyhd8ed1ab_0
-  - sentencepiece=0.1.95=py39h4d2d688_1
-  - setuptools=57.4.0=py39h2804cbe_0
-  - six=1.16.0=pyh6c4a22f_0
-  - sleef=3.5.1=h27ca646_1
-  - sqlite=3.36.0=h72a2b83_0
-  - starlette=0.14.2=pyhd8ed1ab_0
-  - threadpoolctl=2.2.0=pyh8a188c0_0
-  - tk=8.6.11=he1e0b03_0
-  - tokenizers=0.10.3=py39hab32027_1
-  - toml=0.10.2=pyhd8ed1ab_0
-  - torchvision=0.10.1=py39h0a40b5a_0_cpu
-  - tqdm=4.62.1=pyhd8ed1ab_0
-  - transformers=4.14.1=pyhd8ed1ab_0
-  - typing-extensions=3.10.0.0=hd8ed1ab_0
-  - typing_extensions=3.10.0.0=pyha770c72_0
-  - tzdata=2021a=he74cb21_1
-  - tzlocal=4.2=py39h2804cbe_1
-  - urllib3=1.26.6=pyhd8ed1ab_0
-  - uvicorn=0.16.0=py39h2804cbe_0
-  - wheel=0.37.0=pyhd8ed1ab_1
-  - xz=5.2.5=h642e427_1
-  - yaml=0.2.5=h642e427_0
-  - zipp=3.5.0=pyhd8ed1ab_0
-  - zlib=1.2.11=h31e879b_1009
-  - zstd=1.5.0=h861e0a7_0
-prefix: /opt/homebrew/Caskroom/miniforge/base/envs/khoj
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -7,7 +7,7 @@ services:
      # If changing the remote port (right hand side),
      #   change the port in the args in the build section,
      #   as well as the port in the command section to match
-      - "8000:8000"
+      - "42110:42110"
    working_dir: /app
    volumes:
      - .:/app
@@ -25,4 +25,4 @@ services:
      - ./tests/data/embeddings/:/data/embeddings/
      - ./tests/data/models/:/data/models/
    # Use 0.0.0.0 to explicitly set the host ip for the service on the container. https://pythonspeed.com/articles/docker-connection-refused/
-    command: --host="0.0.0.0" --port=8000 -c=config/khoj_docker.yml -vv
+    command: --host="0.0.0.0" --port=42110 -c=config/khoj_docker.yml -vv
--- a/manifest.json
+++ b/manifest.json
@@ -1,7 +1,7 @@
 {
 	"id": "khoj",
 	"name": "Khoj",
-	"version": "0.8.2",
+	"version": "0.9.0",
 	"minAppVersion": "0.15.0",
 	"description": "An AI Personal Assistant for your Digital Brain",
 	"author": "Debanjum Singh Solanky",
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,7 @@ dependencies = [
    "tenacity >= 8.2.2",
    "pillow == 9.3.0",
    "pydantic >= 1.10.10",
-    "pyqt6 == 6.3.1",
+    "pyside6 >= 6.5.1",
    "pyyaml == 6.0",
    "rich >= 13.3.1",
    "schedule == 1.1.0",
--- a/src/interface/emacs/khoj.el
+++ b/src/interface/emacs/khoj.el
@@ -5,7 +5,7 @@
 ;; Author: Debanjum Singh Solanky <debanjum@gmail.com>
 ;; Description: An AI personal assistant for your digital brain
 ;; Keywords: search, chat, org-mode, outlines, markdown, pdf, image
-;; Version: 0.8.2
+;; Version: 0.9.0
 ;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1"))
 ;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs

@@ -62,7 +62,7 @@
 ;; Khoj Static Configuration
 ;; -------------------------

-(defcustom khoj-server-url "http://localhost:8000"
+(defcustom khoj-server-url "http://localhost:42110"
  "Location of Khoj API server."
  :group 'khoj
  :type 'string)
@@ -221,6 +221,11 @@ for example), set this to the full interpreter path."
  :type '(repeat string)
  :group 'khoj)

+(defcustom khoj-chat-model nil
+  "Specify chat model to use for chat with khoj."
+  :type 'string
+  :group 'khoj)
+
 (defcustom khoj-openai-api-key nil
  "OpenAI API key used to configure chat on khoj server."
  :type 'string
@@ -368,7 +373,8 @@ CONFIG is json obtained from Khoj config API."
             (ignore-error json-end-of-file (json-parse-buffer :object-type 'alist :array-type 'list :null-object json-null :false-object json-false))))
         (default-index-dir (khoj--get-directory-from-config default-config '(content-type org embeddings-file)))
         (default-chat-dir (khoj--get-directory-from-config default-config '(processor conversation conversation-logfile)))
-         (default-model (or (alist-get 'model (alist-get 'conversation (alist-get 'processor default-config))) "text-davinci-003"))
+         (chat-model (or khoj-chat-model (alist-get 'chat-model (alist-get 'conversation (alist-get 'processor default-config)))))
+         (default-model (alist-get 'model (alist-get 'conversation (alist-get 'processor default-config))))
         (config (or current-config default-config)))

    ;; Configure content types
@@ -423,6 +429,7 @@ CONFIG is json obtained from Khoj config API."
      (message "khoj.el: Chat not configured yet.")
      (setq config (delq (assoc 'processor config) config))
      (cl-pushnew `(processor . ((conversation . ((conversation-logfile . ,(format "%s/conversation.json" default-chat-dir))
+                                                  (chat-model . ,chat-model)
                                                  (model . ,default-model)
                                                  (openai-api-key . ,khoj-openai-api-key)))))
                  config))
@@ -432,6 +439,7 @@ CONFIG is json obtained from Khoj config API."
       (let ((new-processor-type (alist-get 'processor config)))
         (setq new-processor-type (delq (assoc 'conversation new-processor-type) new-processor-type))
         (cl-pushnew `(conversation . ((conversation-logfile . ,(format "%s/conversation.json" default-chat-dir))
+                                       (chat-model . ,chat-model)
                                       (model . ,default-model)
                                       (openai-api-key . ,khoj-openai-api-key)))
                     new-processor-type)
@@ -439,14 +447,15 @@ CONFIG is json obtained from Khoj config API."
        (cl-pushnew `(processor . ,new-processor-type) config)))

     ;; Else if khoj is not configured with specified openai api key
-     ((not (equal (alist-get 'openai-api-key (alist-get 'conversation (alist-get 'processor config))) khoj-openai-api-key))
+     ((not (and (equal (alist-get 'openai-api-key (alist-get 'conversation (alist-get 'processor config))) khoj-openai-api-key)
+                (equal (alist-get 'chat-model (alist-get 'conversation (alist-get 'processor config))) khoj-chat-model)))
      (message "khoj.el: Chat configuration has gone stale.")
      (let* ((chat-directory (khoj--get-directory-from-config config '(processor conversation conversation-logfile)))
-             (model-name (khoj--get-directory-from-config config '(processor conversation model)))
             (new-processor-type (alist-get 'processor config)))
        (setq new-processor-type (delq (assoc 'conversation new-processor-type) new-processor-type))
        (cl-pushnew `(conversation . ((conversation-logfile . ,(format "%s/conversation.json" chat-directory))
-                                      (model . ,model-name)
+                                      (model . ,default-model)
+                                      (chat-model . ,khoj-chat-model)
                                      (openai-api-key . ,khoj-openai-api-key)))
                    new-processor-type)
        (setq config (delq (assoc 'processor config) config))
@@ -595,7 +604,7 @@ CONFIG is json obtained from Khoj config API."
        (file-extension (file-name-extension buffer-name)))
    (cond
     ((and (member 'org enabled-content-types) (equal file-extension "org")) "org")
-     ((and (member 'org enabled-content-types) (equal file-extension "pdf")) "pdf")
+     ((and (member 'pdf enabled-content-types) (equal file-extension "pdf")) "pdf")
     ((and (member 'markdown enabled-content-types) (or (equal file-extension "markdown") (equal file-extension "md"))) "markdown")
     (t khoj-default-content-type))))

@@ -609,13 +618,13 @@ CONFIG is json obtained from Khoj config API."
  ;; POST provided config to khoj server
  (let ((url-request-method "POST")
        (url-request-extra-headers '(("Content-Type" . "application/json")))
-        (url-request-data (json-encode-alist config))
+        (url-request-data (encode-coding-string (json-encode-alist config) 'utf-8))
        (config-url (format "%s/api/config/data" khoj-server-url)))
    (with-current-buffer (url-retrieve-synchronously config-url)
      (buffer-string)))
  ;; Update index on khoj server after configuration update
  (let ((khoj--server-ready? nil))
-    (url-retrieve (format "%s/api/update?t=org&client=emacs" khoj-server-url) #'identity)))
+    (url-retrieve (format "%s/api/update?client=emacs" khoj-server-url) #'identity)))

 (defun khoj--get-enabled-content-types ()
  "Get content types enabled for search from API."
@@ -1023,7 +1032,8 @@ Paragraph only starts at first text after blank line."
  (let* ((force-update (if (member "--force-update" args) "true" "false"))
         ;; set content type to: specified > last used > based on current buffer > default type
         (content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
-         (update-url (format "%s/api/update?t=%s&force=%s&client=emacs" khoj-server-url content-type force-update))
+         (type-query (if (equal content-type "all") "" (format "t=%s" content-type)))
+         (update-url (format "%s/api/update?%s&force=%s&client=emacs" khoj-server-url type-query force-update))
         (url-request-method "GET"))
    (progn
      (setq khoj--content-type content-type)
--- a/src/interface/obsidian/manifest.json
+++ b/src/interface/obsidian/manifest.json
@@ -1,7 +1,7 @@
 {
 	"id": "khoj",
 	"name": "Khoj",
-	"version": "0.8.2",
+	"version": "0.9.0",
 	"minAppVersion": "0.15.0",
 	"description": "An AI Personal Assistant for your Digital Brain",
 	"author": "Debanjum Singh Solanky",
--- a/src/interface/obsidian/package.json
+++ b/src/interface/obsidian/package.json
@@ -1,6 +1,6 @@
 {
    "name": "Khoj",
-    "version": "0.8.2",
+    "version": "0.9.0",
    "description": "An AI Personal Assistant for your Digital Brain",
    "main": "src/main.js",
    "scripts": {
--- a/src/interface/obsidian/src/search_modal.ts
+++ b/src/interface/obsidian/src/search_modal.ts
@@ -75,7 +75,7 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
                this.rerank = true
                // Set input element to contents of active markdown file
                // truncate to first 8,000 characters to avoid hitting query size limits
-                this.inputEl.value = await this.app.vault.read(file).then(file_str => file_str.slice(0, 8000));
+                this.inputEl.value = await this.app.vault.read(file).then(file_str => file_str.slice(0, 42110));
                // Trigger search to get and render similar notes from khoj backend
                this.inputEl.dispatchEvent(new Event('input'));
                this.rerank = false
--- a/src/interface/obsidian/src/settings.ts
+++ b/src/interface/obsidian/src/settings.ts
@@ -11,7 +11,7 @@ export interface KhojSetting {

 export const DEFAULT_SETTINGS: KhojSetting = {
    resultsCount: 6,
-    khojUrl: 'http://127.0.0.1:8000',
+    khojUrl: 'http://127.0.0.1:42110',
    connectedToBackend: false,
    autoConfigure: true,
    openaiApiKey: '',
--- a/src/interface/obsidian/versions.json
+++ b/src/interface/obsidian/versions.json
@@ -12,5 +12,6 @@
 	"0.7.1": "0.15.0",
 	"0.8.0": "0.15.0",
 	"0.8.1": "0.15.0",
-	"0.8.2": "0.15.0"
+	"0.8.2": "0.15.0",
+	"0.9.0": "0.15.0"
 }
--- a/src/khoj/configure.py
+++ b/src/khoj/configure.py
@@ -20,9 +20,15 @@ from khoj.processor.github.github_to_jsonl import GithubToJsonl
 from khoj.processor.notion.notion_to_jsonl import NotionToJsonl
 from khoj.search_type import image_search, text_search
 from khoj.utils import constants, state
-from khoj.utils.config import SearchType, SearchModels, ProcessorConfigModel, ConversationProcessorConfigModel
+from khoj.utils.config import (
+    ContentIndex,
+    SearchType,
+    SearchModels,
+    ProcessorConfigModel,
+    ConversationProcessorConfigModel,
+)
 from khoj.utils.helpers import LRU, resolve_absolute_path, merge_dicts
-from khoj.utils.rawconfig import FullConfig, ProcessorConfig
+from khoj.utils.rawconfig import FullConfig, ProcessorConfig, SearchConfig, ContentConfig
 from khoj.search_filter.date_filter import DateFilter
 from khoj.search_filter.word_filter import WordFilter
 from khoj.search_filter.file_filter import FileFilter
@@ -31,29 +37,61 @@ from khoj.search_filter.file_filter import FileFilter
 logger = logging.getLogger(__name__)


-def configure_server(args, required=False):
-    if args.config is None:
-        if required:
-            logger.error(
-                f"Exiting as Khoj is not configured.\nConfigure it via http://localhost:8000/config or by editing {state.config_file}."
-            )
-            sys.exit(1)
-        else:
-            logger.warning(
-                f"Khoj is not configured.\nConfigure it via http://localhost:8000/config, plugins or by editing {state.config_file}."
-            )
-            return
-    else:
-        state.config = args.config
+def initialize_server(config: Optional[FullConfig], regenerate: bool, required=False):
+    if config is None and required:
+        logger.error(
+            f"🚨 Exiting as Khoj is not configured.\nConfigure it via http://localhost:42110/config or by editing {state.config_file}."
+        )
+        sys.exit(1)
+    elif config is None:
+        logger.warning(
+            f"🚨 Khoj is not configured.\nConfigure it via http://localhost:42110/config, plugins or by editing {state.config_file}."
+        )
+        return None
+
+    try:
+        configure_server(config, regenerate)
+    except Exception as e:
+        logger.error(f"🚨 Failed to configure server on app load: {e}", exc_info=True)
+
+
+def configure_server(config: FullConfig, regenerate: bool, search_type: Optional[SearchType] = None):
+    # Update Config
+    state.config = config

    # Initialize Processor from Config
-    state.processor_config = configure_processor(args.config.processor)
+    try:
+        state.config_lock.acquire()
+        state.processor_config = configure_processor(state.config.processor)
+    except Exception as e:
+        logger.error(f"🚨 Failed to configure processor")
+        raise e
+    finally:
+        state.config_lock.release()

-    # Initialize the search type and model from Config
-    state.search_index_lock.acquire()
-    state.SearchType = configure_search_types(state.config)
-    state.model = configure_search(state.model, state.config, args.regenerate)
-    state.search_index_lock.release()
+    # Initialize Search Models from Config
+    try:
+        state.config_lock.acquire()
+        state.SearchType = configure_search_types(state.config)
+        state.search_models = configure_search(state.search_models, state.config.search_type)
+    except Exception as e:
+        logger.error(f"🚨 Failed to configure search models")
+        raise e
+    finally:
+        state.config_lock.release()
+
+    # Initialize Content from Config
+    if state.search_models:
+        try:
+            state.config_lock.acquire()
+            state.content_index = configure_content(
+                state.content_index, state.config.content_type, state.search_models, regenerate, search_type
+            )
+        except Exception as e:
+            logger.error(f"🚨 Failed to index content")
+            raise e
+        finally:
+            state.config_lock.release()


 def configure_routes(app):
@@ -72,10 +110,16 @@ if not state.demo:

    @schedule.repeat(schedule.every(61).minutes)
    def update_search_index():
-        state.search_index_lock.acquire()
-        state.model = configure_search(state.model, state.config, regenerate=False)
-        state.search_index_lock.release()
-        logger.info("📬 Search index updated via Scheduler")
+        try:
+            state.config_lock.acquire()
+            state.content_index = configure_content(
+                state.content_index, state.config.content_type, state.search_models, regenerate=False
+            )
+            logger.info("📬 Content index updated via Scheduler")
+        except Exception as e:
+            logger.error(f"🚨 Error updating content index via Scheduler: {e}")
+        finally:
+            state.config_lock.release()


 def configure_search_types(config: FullConfig):
@@ -90,111 +134,134 @@ def configure_search_types(config: FullConfig):
    return Enum("SearchType", merge_dicts(core_search_types, plugin_search_types))


-def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: Optional[state.SearchType] = None):
-    if config is None or config.content_type is None or config.search_type is None:
-        logger.warning("🚨 No Content or Search type is configured.")
-        return
+def configure_search(search_models: SearchModels, search_config: Optional[SearchConfig]) -> Optional[SearchModels]:
+    # Run Validation Checks
+    if search_config is None:
+        logger.warning("🚨 No Search configuration available.")
+        return None
+    if search_models is None:
+        search_models = SearchModels()

-    if model is None:
-        model = SearchModels()
+    # Initialize Search Models
+    if search_config.asymmetric:
+        logger.info("🔍 📜 Setting up text search model")
+        search_models.text_search = text_search.initialize_model(search_config.asymmetric)
+
+    if search_config.image:
+        logger.info("🔍 🌄 Setting up image search model")
+        search_models.image_search = image_search.initialize_model(search_config.image)
+
+    return search_models
+
+
+def configure_content(
+    content_index: Optional[ContentIndex],
+    content_config: Optional[ContentConfig],
+    search_models: SearchModels,
+    regenerate: bool,
+    t: Optional[state.SearchType] = None,
+) -> Optional[ContentIndex]:
+    # Run Validation Checks
+    if content_config is None:
+        logger.warning("🚨 No Content configuration available.")
+        return None
+    if content_index is None:
+        content_index = ContentIndex()

    try:
        # Initialize Org Notes Search
-        if (t == state.SearchType.Org or t == None) and config.content_type.org and config.search_type.asymmetric:
+        if (t == state.SearchType.Org or t == None) and content_config.org and search_models.text_search:
            logger.info("🦄 Setting up search for orgmode notes")
            # Extract Entries, Generate Notes Embeddings
-            model.org_search = text_search.setup(
+            content_index.org = text_search.setup(
                OrgToJsonl,
-                config.content_type.org,
-                search_config=config.search_type.asymmetric,
+                content_config.org,
+                search_models.text_search.bi_encoder,
                regenerate=regenerate,
                filters=[DateFilter(), WordFilter(), FileFilter()],
            )

        # Initialize Markdown Search
-        if (
-            (t == state.SearchType.Markdown or t == None)
-            and config.content_type.markdown
-            and config.search_type.asymmetric
-        ):
+        if (t == state.SearchType.Markdown or t == None) and content_config.markdown and search_models.text_search:
            logger.info("💎 Setting up search for markdown notes")
            # Extract Entries, Generate Markdown Embeddings
-            model.markdown_search = text_search.setup(
+            content_index.markdown = text_search.setup(
                MarkdownToJsonl,
-                config.content_type.markdown,
-                search_config=config.search_type.asymmetric,
+                content_config.markdown,
+                search_models.text_search.bi_encoder,
                regenerate=regenerate,
                filters=[DateFilter(), WordFilter(), FileFilter()],
            )

        # Initialize PDF Search
-        if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric:
+        if (t == state.SearchType.Pdf or t == None) and content_config.pdf and search_models.text_search:
            logger.info("🖨️ Setting up search for pdf")
            # Extract Entries, Generate PDF Embeddings
-            model.pdf_search = text_search.setup(
+            content_index.pdf = text_search.setup(
                PdfToJsonl,
-                config.content_type.pdf,
-                search_config=config.search_type.asymmetric,
+                content_config.pdf,
+                search_models.text_search.bi_encoder,
                regenerate=regenerate,
                filters=[DateFilter(), WordFilter(), FileFilter()],
            )

        # Initialize Image Search
-        if (t == state.SearchType.Image or t == None) and config.content_type.image and config.search_type.image:
+        if (t == state.SearchType.Image or t == None) and content_config.image and search_models.image_search:
            logger.info("🌄 Setting up search for images")
            # Extract Entries, Generate Image Embeddings
-            model.image_search = image_search.setup(
-                config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
+            content_index.image = image_search.setup(
+                content_config.image, search_models.image_search.image_encoder, regenerate=regenerate
            )

-        if (t == state.SearchType.Github or t == None) and config.content_type.github and config.search_type.asymmetric:
+        if (t == state.SearchType.Github or t == None) and content_config.github and search_models.text_search:
            logger.info("🐙 Setting up search for github")
            # Extract Entries, Generate Github Embeddings
-            model.github_search = text_search.setup(
+            content_index.github = text_search.setup(
                GithubToJsonl,
-                config.content_type.github,
-                search_config=config.search_type.asymmetric,
+                content_config.github,
+                search_models.text_search.bi_encoder,
                regenerate=regenerate,
                filters=[DateFilter(), WordFilter(), FileFilter()],
            )

        # Initialize External Plugin Search
-        if (t == None or t in state.SearchType) and config.content_type.plugins:
+        if (t == None or t in state.SearchType) and content_config.plugins and search_models.text_search:
            logger.info("🔌 Setting up search for plugins")
-            model.plugin_search = {}
-            for plugin_type, plugin_config in config.content_type.plugins.items():
-                model.plugin_search[plugin_type] = text_search.setup(
+            content_index.plugins = {}
+            for plugin_type, plugin_config in content_config.plugins.items():
+                content_index.plugins[plugin_type] = text_search.setup(
                    JsonlToJsonl,
                    plugin_config,
-                    search_config=config.search_type.asymmetric,
+                    search_models.text_search.bi_encoder,
                    regenerate=regenerate,
                    filters=[DateFilter(), WordFilter(), FileFilter()],
                )

        # Initialize Notion Search
-        if (t == None or t in state.SearchType) and config.content_type.notion:
+        if (t == None or t in state.SearchType) and content_config.notion and search_models.text_search:
            logger.info("🔌 Setting up search for notion")
-            model.notion_search = text_search.setup(
+            content_index.notion = text_search.setup(
                NotionToJsonl,
-                config.content_type.notion,
-                search_config=config.search_type.asymmetric,
+                content_config.notion,
+                search_models.text_search.bi_encoder,
                regenerate=regenerate,
                filters=[DateFilter(), WordFilter(), FileFilter()],
            )

    except Exception as e:
-        logger.error("🚨 Failed to setup search")
+        logger.error(f"🚨 Failed to setup search: {e}", exc_info=True)
        raise e

    # Invalidate Query Cache
    state.query_cache = LRU()

-    return model
+    return content_index


-def configure_processor(processor_config: ProcessorConfig):
+def configure_processor(processor_config: Optional[ProcessorConfig]):
    if not processor_config:
-        return
+        logger.warning("🚨 No Processor configuration available.")
+        return None

    processor = ProcessorConfigModel()

--- a/src/khoj/interface/desktop/main_window.py
+++ b/src/khoj/interface/desktop/main_window.py
@@ -2,11 +2,24 @@
 import webbrowser

 # External Packages
-from PyQt6 import QtGui, QtWidgets
-from PyQt6.QtCore import Qt
+from PySide6 import QtGui, QtWidgets
+from PySide6.QtCore import Qt

 # Internal Packages
 from khoj.utils import constants
+from PySide6.QtCore import QThread
+
+
+class ServerThread(QThread):
+    def __init__(self, start_server_func):
+        super(ServerThread, self).__init__()
+        self.start_server_func = start_server_func
+
+    def __del__(self):
+        self.wait()
+
+    def run(self):
+        self.start_server_func()


 class MainWindow(QtWidgets.QMainWindow):
--- a/src/khoj/interface/desktop/system_tray.py
+++ b/src/khoj/interface/desktop/system_tray.py
@@ -2,7 +2,7 @@
 import webbrowser

 # External Packages
-from PyQt6 import QtGui, QtWidgets
+from PySide6 import QtGui, QtWidgets

 # Internal Packages
 from khoj.utils import constants, state
--- a/src/khoj/interface/web/assets/icons/question-mark-icon.svg
+++ b/src/khoj/interface/web/assets/icons/question-mark-icon.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" shape-rendering="geometricPrecision" text-rendering="geometricPrecision" image-rendering="optimizeQuality" fill-rule="evenodd" clip-rule="evenodd" viewBox="0 0 512 512"><path fill-rule="nonzero" d="M256 0c70.69 0 134.7 28.66 181.02 74.98C483.34 121.31 512 185.31 512 256c0 70.69-28.66 134.7-74.98 181.02C390.7 483.34 326.69 512 256 512c-70.69 0-134.69-28.66-181.02-74.98C28.66 390.7 0 326.69 0 256c0-70.69 28.66-134.69 74.98-181.02C121.31 28.66 185.31 0 256 0zm-21.49 301.51v-2.03c.16-13.46 1.48-24.12 4.07-32.05 2.54-7.92 6.19-14.37 10.97-19.25 4.77-4.92 10.51-9.39 17.22-13.46 4.31-2.74 8.22-5.78 11.68-9.18 3.45-3.36 6.19-7.27 8.23-11.69 2.02-4.37 3.04-9.24 3.04-14.62 0-6.4-1.52-11.94-4.57-16.66-3-4.68-7.06-8.28-12.04-10.87-5.03-2.54-10.61-3.81-16.76-3.81-5.53 0-10.81 1.11-15.89 3.45-5.03 2.29-9.25 5.89-12.55 10.77-3.3 4.87-5.23 11.12-5.74 18.74h-32.91c.51-12.95 3.81-23.92 9.85-32.91 6.1-8.99 14.13-15.8 24.08-20.42 10.01-4.62 21.08-6.9 33.16-6.9 13.31 0 24.89 2.43 34.84 7.41 9.96 4.93 17.73 11.83 23.27 20.67 5.48 8.84 8.28 19.1 8.28 30.88 0 8.08-1.27 15.34-3.81 21.79-2.54 6.45-6.1 12.24-10.77 17.27-4.68 5.08-10.21 9.54-16.71 13.41-6.15 3.86-11.12 7.82-14.88 11.93-3.81 4.11-6.56 8.99-8.28 14.58-1.73 5.63-2.69 12.59-2.84 20.92v2.03h-30.94zm16.36 65.82c-5.94-.04-11.02-2.13-15.29-6.35-4.26-4.21-6.35-9.34-6.35-15.33 0-5.89 2.09-10.97 6.35-15.19 4.27-4.21 9.35-6.35 15.29-6.35 5.84 0 10.92 2.14 15.18 6.35 4.32 4.22 6.45 9.3 6.45 15.19 0 3.96-1.01 7.62-2.99 10.87-1.98 3.3-4.57 5.94-7.82 7.87-3.25 1.93-6.86 2.9-10.82 2.94zM417.71 94.29C376.33 52.92 319.15 27.32 256 27.32c-63.15 0-120.32 25.6-161.71 66.97C52.92 135.68 27.32 192.85 27.32 256c0 63.15 25.6 120.33 66.97 161.71 41.39 41.37 98.56 66.97 161.71 66.97 63.15 0 120.33-25.6 161.71-66.97 41.37-41.38 66.97-98.56 66.97-161.71 0-63.15-25.6-120.32-66.97-161.71z"/></svg>
--- a/src/khoj/interface/web/assets/khoj.css
+++ b/src/khoj/interface/web/assets/khoj.css
@@ -85,6 +85,21 @@ img.khoj-logo {
    justify-self: center;
 }

+a.khoj-banner {
+    color: black;
+    text-decoration: none;
+}
+
+p.khoj-banner {
+    font-size: medium;
+    margin: 0;
+    padding: 10px;
+}
+
+p#khoj-banner {
+    display: inline;
+}
+
@media only screen and (max-width: 600px) {
    div.khoj-header {
        display: grid;
--- a/src/khoj/interface/web/base_config.html
+++ b/src/khoj/interface/web/base_config.html
@@ -51,6 +51,10 @@
            body.khoj-configure {
                padding: 0;
            }
+
+            div.section {
+                padding: 12px;
+            }
        }

        img.khoj-logo {
@@ -69,6 +73,11 @@
            display: grid;
            justify-self: center;
        }
+
+        div.instructions {
+            font-size: large;
+        }
+
        .section-title {
            margin: 0;
            padding: 0 0 16px 0;
@@ -162,6 +171,11 @@
            max-width: 16px;
        }

+        div.finalize-actions {
+            grid-auto-flow: column;
+            grid-gap: 24px;
+        }
+
        @media screen and (max-width: 600px) {
            .section-cards {
                grid-template-columns: 1fr;
--- a/src/khoj/interface/web/chat.html
+++ b/src/khoj/interface/web/chat.html
@@ -166,20 +166,20 @@
        }
    </script>
    <body>
+        <div id="khoj-banner-container" class="khoj-banner-container">
+        {% if demo %}
+            <!-- Banner linking to https://khoj.dev -->
+                <a class="khoj-banner" href="https://khoj.dev" target="_blank">
+                    <p id="khoj-banner" class="khoj-banner">
+                        Enroll in Khoj cloud to get your own assistant
+                    </p>
+                </a>
+                <input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
+                <button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
+        {% endif %}
+        </div>
        <!--Add Header Logo and Nav Pane-->
        <div class="khoj-header">
-            {% if demo %}
-                <!-- Banner linking to https://khoj.dev -->
-                <div class="khoj-banner-container">
-                    <a class="khoj-banner" href="https://khoj.dev" target="_blank">
-                        <p id="khoj-banner" class="khoj-banner">
-                            Enroll in Khoj cloud to get your own Github assistant
-                        </p>
-                    </a>
-                    <input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
-                    <button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
-                </div>
-            {% endif %}
            {% if demo %}
                <a class="khoj-logo" href="https://khoj.dev" target="_blank">
                    <img class="khoj-logo" src="/static/assets/icons/khoj-logo-sideways.svg" alt="Khoj"></img>
@@ -351,7 +351,7 @@
        @media only screen and (max-width: 600px) {
            body {
                grid-template-columns: 1fr;
-                grid-template-rows: auto minmax(80px, 100%) auto;
+                grid-template-rows: auto auto minmax(80px, 100%) auto;
            }
            body > * {
                grid-column: 1;
@@ -364,11 +364,14 @@
            a.khoj-banner {
                display: block;
            }
+            p.khoj-banner {
+                padding: 0;
+            }
        }
        @media only screen and (min-width: 600px) {
            body {
                grid-template-columns: auto min(70vw, 100%) auto;
-                grid-template-rows: auto minmax(80px, 100%) auto;
+                grid-template-rows: auto auto minmax(80px, 100%) auto;
            }
            body > * {
                grid-column: 2;
@@ -395,19 +398,10 @@
            }
        }

-        a.khoj-banner {
-            color: black;
-        }
-
        a.khoj-logo {
            text-align: center;
        }

-        p.khoj-banner {
-            margin: 0;
-            padding: 10px;
-        }
-
        button#khoj-banner-submit,
        input#khoj-banner-email {
            padding: 10px;
@@ -420,17 +414,17 @@
        input#khoj-banner-email:hover {
            box-shadow: 0 0 11px #aaa;
        }
-
-        p#khoj-banner {
-            display: inline;
-        }
-
-        a.khoj-banner {
-            color: black;
-            text-decoration: none;
+        div.khoj-banner-container-hidden {
+            margin: 0px;
+            padding: 0px;
        }
    </style>
    <script>
+        if ("{{demo}}" === "False") {
+            document.getElementById("khoj-banner-container").classList.remove("khoj-banner-container");
+            document.getElementById("khoj-banner-container").classList.add("khoj-banner-container-hidden");
+        }
+
        var khojBannerSubmit = document.getElementById("khoj-banner-submit");

        khojBannerSubmit?.addEventListener("click", function(event) {
--- a/src/khoj/interface/web/config.html
+++ b/src/khoj/interface/web/config.html
@@ -11,7 +11,11 @@
                    <h3 class="card-title">
                        Github
                        {% if current_config.content_type.github %}
-                            <img id="configured-icon-github" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                            {% if current_model_state.github == False %}
+                                <img id="misconfigured-icon-github" class="configured-icon" src="/static/assets/icons/question-mark-icon.svg" alt="Not Configured" title="Embeddings have not been generated yet for this content type. Either the configuration is invalid, or you just need to click Configure.">
+                            {% else %}
+                                <img id="configured-icon-github" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                            {% endif %}
                        {% endif %}
                    </h3>
                </div>
@@ -42,7 +46,11 @@
                    <h3 class="card-title">
                        Notion
                        {% if current_config.content_type.notion %}
-                            <img id="configured-icon-notion" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                            {% if current_model_state.notion == False %}
+                                <img id="misconfigured-icon-notion" class="configured-icon" src="/static/assets/icons/question-mark-icon.svg" alt="Not Configured" title="Embeddings have not been generated yet for this content type. Either the configuration is invalid, or you just need to click Configure.">
+                            {% else %}
+                                <img id="configured-icon-notion" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                            {% endif %}
                        {% endif %}
                    </h3>
                </div>
@@ -73,7 +81,11 @@
                    <h3 class="card-title">
                        Markdown
                        {% if current_config.content_type.markdown %}
-                            <img id="configured-icon-markdown" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                            {% if current_model_state.markdown == False%}
+                                <img id="misconfigured-icon-markdown" class="configured-icon" src="/static/assets/icons/question-mark-icon.svg" alt="Not Configured" title="Embeddings have not been generated yet for this content type. Either the configuration is invalid, or you just need to click Configure.">
+                            {% else %}
+                                <img id="configured-icon-markdown" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                            {% endif %}
                        {% endif %}
                    </h3>
                </div>
@@ -104,7 +116,11 @@
                    <h3 class="card-title">
                        Org
                        {% if current_config.content_type.org %}
-                            <img id="configured-icon-org" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                            {% if current_model_state.org == False %}
+                                <img id="misconfigured-icon-org" class="configured-icon" src="/static/assets/icons/question-mark-icon.svg" alt="Not Configured" title="Embeddings have not been generated yet for this content type. Either the configuration is invalid, or you just need to click Configure.">
+                            {% else %}
+                                <img id="configured-icon-org" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                            {% endif %}
                        {% endif %}
                    </h3>
                </div>
@@ -135,7 +151,11 @@
                    <h3 class="card-title">
                        PDF
                        {% if current_config.content_type.pdf %}
-                            <img id="configured-icon-pdf" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                            {% if current_model_state.pdf == False %}
+                                <img id="misconfigured-icon-pdf" class="configured-icon" src="/static/assets/icons/question-mark-icon.svg" alt="Not Configured" title="Embeddings have not been generated yet for this content type. Either the configuration is invalid, or you need to click Configure.">
+                            {% else %}
+                                <img id="configured-icon-pdf" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                            {% endif %}
                        {% endif %}
                    </h3>
                </div>
@@ -171,8 +191,12 @@
                    <h3 class="card-title">
                        Chat
                        {% if current_config.processor and current_config.processor.conversation %}
-                            <img id="configured-icon-conversation-processor" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
-                         {% endif %}
+                            {% if current_model_state.conversation == False %}
+                                <img id="misconfigured-icon-conversation-processor" class="configured-icon" src="/static/assets/icons/question-mark-icon.svg" alt="Not Configured" title="Embeddings have not been generated yet for this content type. Either the configuration is invalid, or you just need to click Configure.">
+                            {% else %}
+                                <img id="configured-icon-conversation-processor" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                            {% endif %}
+                        {% endif %}
                    </h3>
                </div>
                <div class="card-description-row">
@@ -204,6 +228,8 @@
            <input type="range" id="results-count-slider" name="results-count-slider" min="1" max="10" step="1" value="5">
        </div>
        <div id="status" style="display: none;"></div>
+    </div>
+    <div class="section finalize-actions">
        <button id="configure" type="submit" title="Update index with the latest changes">⚙️ Configure</button>
        <button id="reinitialize" type="submit" title="Regenerate index from scratch">🔄 Reinitialize</button>
    </div>
@@ -225,7 +251,14 @@
                contentTypeClearButton.style.display = "none";

                var configuredIcon = document.getElementById("configured-icon-" + content_type);
-                configuredIcon.style.display = "none";
+                if (configuredIcon) {
+                    configuredIcon.style.display = "none";
+                }
+
+                var misconfiguredIcon = document.getElementById("misconfigured-icon-" + content_type);
+                if (misconfiguredIcon) {
+                    misconfiguredIcon.style.display = "none";
+                }
            }
        })
    };
@@ -246,7 +279,15 @@
                conversationClearButton.style.display = "none";

                var configuredIcon = document.getElementById("configured-icon-conversation-processor");
-                configuredIcon.style.display = "none";
+                if (configuredIcon) {
+                    configuredIcon.style.display = "none";
+                }
+
+                var misconfiguredIcon = document.getElementById("misconfigured-icon-conversation-processor");
+
+                if (misconfiguredIcon) {
+                    misconfiguredIcon.style.display = "none";
+                }
            }
        })
    };
@@ -292,14 +333,14 @@
            if (data.detail != null) {
                throw new Error(data.detail);
            }
-            document.getElementById("status").innerHTML = emoji + successText;
+            document.getElementById("status").innerHTML = emoji + " " + successText;
            document.getElementById("status").style.display = "block";
            button.disabled = false;
            button.innerHTML = '✅ Done!';
        })
        .catch((error) => {
            console.error('Error:', error);
-            document.getElementById("status").innerHTML = emoji + errorText
+            document.getElementById("status").innerHTML = emoji + " " + errorText
            document.getElementById("status").style.display = "block";
            button.disabled = false;
            button.innerHTML = '⚠️ Unsuccessful';
--- a/src/khoj/interface/web/content_type_github_input.html
+++ b/src/khoj/interface/web/content_type_github_input.html
@@ -5,6 +5,9 @@
        <h2 class="section-title">
            <img class="card-icon" src="/static/assets/icons/github.svg" alt="Github">
            <span class="card-title-text">Github</span>
+            <div class="instructions">
+                <a href="https://github.com/khoj-ai/khoj/wiki/Setup-Github-integration">ⓘ Help</a>
+            </div>
        </h2>
        <form>
            <table>
--- a/src/khoj/interface/web/content_type_notion_input.html
+++ b/src/khoj/interface/web/content_type_notion_input.html
@@ -5,6 +5,9 @@
        <h2 class="section-title">
            <img class="card-icon" src="/static/assets/icons/notion.svg" alt="Notion">
            <span class="card-title-text">Notion</span>
+            <div class="instructions">
+                <a href="https://github.com/khoj-ai/khoj/wiki/Setup-Notion-Integration">ⓘ Help</a>
+            </div>
        </h2>
        <form>
            <table>
--- a/src/khoj/interface/web/index.html
+++ b/src/khoj/interface/web/index.html
@@ -216,19 +216,21 @@
    </script>

    <body>
+        {% if demo %}
+            <!-- Banner linking to https://khoj.dev -->
+            <div class="khoj-banner-container">
+                <a class="khoj-banner" href="https://khoj.dev" target="_blank">
+                    <p id="khoj-banner" class="khoj-banner">
+                        Enroll in Khoj cloud to get your own assistant
+                    </p>
+                </a>
+                <input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
+                <button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
+            </div>
+        {% endif %}
        <!--Add Header Logo and Nav Pane-->
        <div class="khoj-header">
            {% if demo %}
-                <!-- Banner linking to https://khoj.dev -->
-                <div class="khoj-banner-container">
-                    <a class="khoj-banner" href="https://khoj.dev" target="_blank">
-                        <p id="khoj-banner" class="khoj-banner">
-                            Enroll in Khoj cloud to get your own Github assistant
-                        </p>
-                    </a>
-                    <input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
-                    <button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
-                </div>
                <a class="khoj-logo" href="https://khoj.dev" target="_blank">
                    <img class="khoj-logo" src="/static/assets/icons/khoj-logo-sideways.svg" alt="Khoj"></img>
                </a>
@@ -447,19 +449,10 @@
            }
        }

-        a.khoj-banner {
-            color: black;
-        }
-
        a.khoj-logo {
            text-align: center;
        }

-        p.khoj-banner {
-            margin: 0;
-            padding: 10px;
-        }
-
        button#khoj-banner-submit,
        input#khoj-banner-email {
            padding: 10px;
@@ -473,14 +466,13 @@
            box-shadow: 0 0 11px #aaa;
        }

-        p#khoj-banner {
-            display: inline;
-        }
-
        @media only screen and (max-width: 600px) {
            a.khoj-banner {
                display: block;
            }
+            p.khoj-banner {
+                padding: 0;
+            }
        }

    </style>
--- a/src/khoj/main.py
+++ b/src/khoj/main.py
@@ -2,6 +2,12 @@
 import os
 import signal
 import sys
+
+if sys.stdout is None:
+    sys.stdout = open(os.devnull, "w")
+if sys.stderr is None:
+    sys.stderr = open(os.devnull, "w")
+
 import logging
 import threading
 import warnings
@@ -15,18 +21,13 @@ warnings.filterwarnings("ignore", message=r"legacy way to download files from th
 # External Packages
 import uvicorn
 from fastapi import FastAPI
-from PyQt6 import QtWidgets
-from PyQt6.QtCore import QThread, QTimer
 from rich.logging import RichHandler
 import schedule

 # Internal Packages
-from khoj.configure import configure_routes, configure_server
+from khoj.configure import configure_routes, initialize_server
 from khoj.utils import state
 from khoj.utils.cli import cli
-from khoj.interface.desktop.main_window import MainWindow
-from khoj.interface.desktop.system_tray import create_system_tray
-

 # Initialize the Application Server
 app = FastAPI()
@@ -69,10 +70,15 @@ def run():
        poll_task_scheduler()

        # Start Server
-        configure_server(args, required=False)
+        initialize_server(args.config, args.regenerate, required=False)
        configure_routes(app)
        start_server(app, host=args.host, port=args.port, socket=args.socket)
    else:
+        from PySide6 import QtWidgets
+        from PySide6.QtCore import QThread, QTimer
+        from khoj.interface.desktop.main_window import MainWindow, ServerThread
+        from khoj.interface.desktop.system_tray import create_system_tray
+
        # Setup GUI
        gui = QtWidgets.QApplication([])
        main_window = MainWindow(args.host, args.port)
@@ -87,9 +93,9 @@ def run():
            tray.show()

        # Setup Server
-        configure_server(args, required=False)
+        initialize_server(args.config, args.regenerate, required=False)
        configure_routes(app)
-        server = ServerThread(app, args.host, args.port, args.socket)
+        server = ServerThread(start_server_func=lambda: start_server(app, host=args.host, port=args.port))

        url = f"http://{args.host}:{args.port}"
        logger.info(f"🌗 Khoj is running at {url}")
@@ -130,6 +136,8 @@ def run():


 def sigint_handler(*args):
+    from PySide6 import QtWidgets
+
    QtWidgets.QApplication.quit()


@@ -158,21 +166,6 @@ def poll_task_scheduler():
    schedule.run_pending()


-class ServerThread(QThread):
-    def __init__(self, app, host=None, port=None, socket=None):
-        super(ServerThread, self).__init__()
-        self.app = app
-        self.host = host
-        self.port = port
-        self.socket = socket
-
-    def __del__(self):
-        self.wait()
-
-    def run(self):
-        start_server(self.app, self.host, self.port, self.socket)
-
-
 def run_gui():
    sys.argv += ["--gui"]
    run()
--- a/src/khoj/processor/github/github_to_jsonl.py
+++ b/src/khoj/processor/github/github_to_jsonl.py
@@ -13,9 +13,8 @@ from khoj.utils.rawconfig import Entry, GithubContentConfig, GithubRepoConfig
 from khoj.processor.markdown.markdown_to_jsonl import MarkdownToJsonl
 from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
 from khoj.processor.text_to_jsonl import TextToJsonl
-from khoj.utils.jsonl import dump_jsonl, compress_jsonl_data
+from khoj.utils.jsonl import compress_jsonl_data
 from khoj.utils.rawconfig import Entry
-from khoj.utils import state


 logger = logging.getLogger(__name__)
@@ -38,7 +37,7 @@ class GithubToJsonl(TextToJsonl):
        else:
            return

-    def process(self, previous_entries=None):
+    def process(self, previous_entries=[]):
        current_entries = []
        for repo in self.config.repos:
            current_entries += self.process_repo(repo)
@@ -98,10 +97,7 @@ class GithubToJsonl(TextToJsonl):
            jsonl_data = MarkdownToJsonl.convert_markdown_maps_to_jsonl(entries)

            # Compress JSONL formatted Data
-            if self.config.compressed_jsonl.suffix == ".gz":
-                compress_jsonl_data(jsonl_data, self.config.compressed_jsonl)
-            elif self.config.compressed_jsonl.suffix == ".jsonl":
-                dump_jsonl(jsonl_data, self.config.compressed_jsonl)
+            compress_jsonl_data(jsonl_data, self.config.compressed_jsonl)

        return entries_with_ids

--- a/src/khoj/processor/jsonl/jsonl_to_jsonl.py
+++ b/src/khoj/processor/jsonl/jsonl_to_jsonl.py
@@ -7,7 +7,7 @@ from typing import List
 # Internal Packages
 from khoj.processor.text_to_jsonl import TextToJsonl
 from khoj.utils.helpers import get_absolute_path, timer
-from khoj.utils.jsonl import load_jsonl, dump_jsonl, compress_jsonl_data
+from khoj.utils.jsonl import load_jsonl, compress_jsonl_data
 from khoj.utils.rawconfig import Entry


@@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)

 class JsonlToJsonl(TextToJsonl):
    # Define Functions
-    def process(self, previous_entries=None):
+    def process(self, previous_entries=[]):
        # Extract required fields from config
        input_jsonl_files, input_jsonl_filter, output_file = (
            self.config.input_files,
@@ -38,15 +38,9 @@ class JsonlToJsonl(TextToJsonl):

        # Identify, mark and merge any new entries with previous entries
        with timer("Identify new or updated entries", logger):
-            if not previous_entries:
-                entries_with_ids = list(enumerate(current_entries))
-            else:
-                entries_with_ids = TextToJsonl.mark_entries_for_update(
-                    current_entries,
-                    previous_entries,
-                    key="compiled",
-                    logger=logger,
-                )
+            entries_with_ids = TextToJsonl.mark_entries_for_update(
+                current_entries, previous_entries, key="compiled", logger=logger
+            )

        with timer("Write entries to JSONL file", logger):
            # Process Each Entry from All Notes Files
@@ -54,10 +48,7 @@ class JsonlToJsonl(TextToJsonl):
            jsonl_data = JsonlToJsonl.convert_entries_to_jsonl(entries)

            # Compress JSONL formatted Data
-            if output_file.suffix == ".gz":
-                compress_jsonl_data(jsonl_data, output_file)
-            elif output_file.suffix == ".jsonl":
-                dump_jsonl(jsonl_data, output_file)
+            compress_jsonl_data(jsonl_data, output_file)

        return entries_with_ids

--- a/src/khoj/processor/markdown/markdown_to_jsonl.py
+++ b/src/khoj/processor/markdown/markdown_to_jsonl.py
@@ -10,7 +10,7 @@ from typing import List
 from khoj.processor.text_to_jsonl import TextToJsonl
 from khoj.utils.helpers import get_absolute_path, is_none_or_empty, timer
 from khoj.utils.constants import empty_escape_sequences
-from khoj.utils.jsonl import dump_jsonl, compress_jsonl_data
+from khoj.utils.jsonl import compress_jsonl_data
 from khoj.utils.rawconfig import Entry, TextContentConfig


@@ -23,7 +23,7 @@ class MarkdownToJsonl(TextToJsonl):
        self.config = config

    # Define Functions
-    def process(self, previous_entries=None):
+    def process(self, previous_entries=[]):
        # Extract required fields from config
        markdown_files, markdown_file_filter, output_file = (
            self.config.input_files,
@@ -51,12 +51,9 @@ class MarkdownToJsonl(TextToJsonl):

        # Identify, mark and merge any new entries with previous entries
        with timer("Identify new or updated entries", logger):
-            if not previous_entries:
-                entries_with_ids = list(enumerate(current_entries))
-            else:
-                entries_with_ids = TextToJsonl.mark_entries_for_update(
-                    current_entries, previous_entries, key="compiled", logger=logger
-                )
+            entries_with_ids = TextToJsonl.mark_entries_for_update(
+                current_entries, previous_entries, key="compiled", logger=logger
+            )

        with timer("Write markdown entries to JSONL file", logger):
            # Process Each Entry from All Notes Files
@@ -64,10 +61,7 @@ class MarkdownToJsonl(TextToJsonl):
            jsonl_data = MarkdownToJsonl.convert_markdown_maps_to_jsonl(entries)

            # Compress JSONL formatted Data
-            if output_file.suffix == ".gz":
-                compress_jsonl_data(jsonl_data, output_file)
-            elif output_file.suffix == ".jsonl":
-                dump_jsonl(jsonl_data, output_file)
+            compress_jsonl_data(jsonl_data, output_file)

        return entries_with_ids

--- a/src/khoj/processor/notion/notion_to_jsonl.py
+++ b/src/khoj/processor/notion/notion_to_jsonl.py
@@ -8,7 +8,7 @@ import requests
 from khoj.utils.helpers import timer
 from khoj.utils.rawconfig import Entry, NotionContentConfig
 from khoj.processor.text_to_jsonl import TextToJsonl
-from khoj.utils.jsonl import dump_jsonl, compress_jsonl_data
+from khoj.utils.jsonl import compress_jsonl_data
 from khoj.utils.rawconfig import Entry

 from enum import Enum
@@ -78,7 +78,9 @@ class NotionToJsonl(TextToJsonl):
            NotionBlockType.DIVIDER.value,
        ]

-    def process(self, previous_entries=None):
+        self.body_params = {"page_size": 100}
+
+    def process(self, previous_entries=[]):
        current_entries = []

        # Get all pages
@@ -88,13 +90,13 @@ class NotionToJsonl(TextToJsonl):
            while True:
                result = self.session.post(
                    "https://api.notion.com/v1/search",
-                    json={"page_size": 100},
+                    json=self.body_params,
                ).json()
                responses.append(result)
                if result["has_more"] == False:
                    break
                else:
-                    self.session.params = {"start_cursor": responses[-1]["next_cursor"]}
+                    self.body_params.update({"start_cursor": result["next_cursor"]})

        for response in responses:
            with timer("Processing response", logger=logger):
@@ -174,7 +176,8 @@ class NotionToJsonl(TextToJsonl):
        return f"\n<b>{heading}</b>\n"

    def process_nested_children(self, children, raw_content, block_type=None):
-        for child in children["results"]:
+        results = children["results"] if children.get("results") else []
+        for child in results:
            child_type = child.get("type")
            if child_type == None:
                continue
@@ -199,7 +202,11 @@ class NotionToJsonl(TextToJsonl):
        return raw_text

    def get_block_children(self, block_id):
-        return self.session.get(f"https://api.notion.com/v1/blocks/{block_id}/children").json()
+        try:
+            return self.session.get(f"https://api.notion.com/v1/blocks/{block_id}/children").json()
+        except Exception as e:
+            logger.error(f"Error getting children for block {block_id}: {e}")
+            return {}

    def get_page(self, page_id):
        return self.session.get(f"https://api.notion.com/v1/pages/{page_id}").json()
@@ -215,19 +222,27 @@ class NotionToJsonl(TextToJsonl):
            logger.error(f"Error getting page {page_id}: {e}")
            return None, None
        properties = page["properties"]
-        title_field = "Title" if "Title" in properties else "title"
+        title_field = "title"
+        if "Title" in properties:
+            title_field = "Title"
+        elif "Name" in properties:
+            title_field = "Name"
+        elif "Page" in properties:
+            title_field = "Page"
+        elif "Event" in properties:
+            title_field = "Event"
+        elif title_field not in properties:
+            logger.error(f"Page {page_id} does not have a title field")
+            return None, None
        title = page["properties"][title_field]["title"][0]["text"]["content"]
        return title, content

    def update_entries_with_ids(self, current_entries, previous_entries):
        # Identify, mark and merge any new entries with previous entries
        with timer("Identify new or updated entries", logger):
-            if not previous_entries:
-                entries_with_ids = list(enumerate(current_entries))
-            else:
-                entries_with_ids = TextToJsonl.mark_entries_for_update(
-                    current_entries, previous_entries, key="compiled", logger=logger
-                )
+            entries_with_ids = TextToJsonl.mark_entries_for_update(
+                current_entries, previous_entries, key="compiled", logger=logger
+            )

        with timer("Write Notion entries to JSONL file", logger):
            # Process Each Entry from all Notion entries
@@ -235,9 +250,6 @@ class NotionToJsonl(TextToJsonl):
            jsonl_data = TextToJsonl.convert_text_maps_to_jsonl(entries)

            # Compress JSONL formatted Data
-            if self.config.compressed_jsonl.suffix == ".gz":
-                compress_jsonl_data(jsonl_data, self.config.compressed_jsonl)
-            elif self.config.compressed_jsonl.suffix == ".jsonl":
-                dump_jsonl(jsonl_data, self.config.compressed_jsonl)
+            compress_jsonl_data(jsonl_data, self.config.compressed_jsonl)

        return entries_with_ids
--- a/src/khoj/processor/org_mode/org_to_jsonl.py
+++ b/src/khoj/processor/org_mode/org_to_jsonl.py
@@ -8,7 +8,7 @@ from typing import Iterable, List
 from khoj.processor.org_mode import orgnode
 from khoj.processor.text_to_jsonl import TextToJsonl
 from khoj.utils.helpers import get_absolute_path, is_none_or_empty, timer
-from khoj.utils.jsonl import dump_jsonl, compress_jsonl_data
+from khoj.utils.jsonl import compress_jsonl_data
 from khoj.utils.rawconfig import Entry, TextContentConfig
 from khoj.utils import state

@@ -22,7 +22,7 @@ class OrgToJsonl(TextToJsonl):
        self.config = config

    # Define Functions
-    def process(self, previous_entries: List[Entry] = None):
+    def process(self, previous_entries: List[Entry] = []):
        # Extract required fields from config
        org_files, org_file_filter, output_file = (
            self.config.input_files,
@@ -51,9 +51,7 @@ class OrgToJsonl(TextToJsonl):
            current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256)

        # Identify, mark and merge any new entries with previous entries
-        if not previous_entries:
-            entries_with_ids = list(enumerate(current_entries))
-        else:
+        with timer("Identify new or updated entries", logger):
            entries_with_ids = TextToJsonl.mark_entries_for_update(
                current_entries, previous_entries, key="compiled", logger=logger
            )
@@ -64,10 +62,7 @@ class OrgToJsonl(TextToJsonl):
            jsonl_data = self.convert_org_entries_to_jsonl(entries)

            # Compress JSONL formatted Data
-            if output_file.suffix == ".gz":
-                compress_jsonl_data(jsonl_data, output_file)
-            elif output_file.suffix == ".jsonl":
-                dump_jsonl(jsonl_data, output_file)
+            compress_jsonl_data(jsonl_data, output_file)

        return entries_with_ids

@@ -125,9 +120,13 @@ class OrgToJsonl(TextToJsonl):
                # Ignore title notes i.e notes with just headings and empty body
                continue

+            todo_str = f"{parsed_entry.todo} " if parsed_entry.todo else ""
            # Prepend filename as top heading to entry
            filename = Path(entry_to_file_map[parsed_entry]).stem
-            heading = f"* {filename}\n** {parsed_entry.heading}." if parsed_entry.heading else f"* {filename}."
+            if parsed_entry.heading:
+                heading = f"* {filename}\n** {todo_str}{parsed_entry.heading}."
+            else:
+                heading = f"* {filename}."

            compiled = heading
            if state.verbose > 2:
--- a/src/khoj/processor/pdf/pdf_to_jsonl.py
+++ b/src/khoj/processor/pdf/pdf_to_jsonl.py
@@ -10,7 +10,7 @@ from langchain.document_loaders import PyPDFLoader
 # Internal Packages
 from khoj.processor.text_to_jsonl import TextToJsonl
 from khoj.utils.helpers import get_absolute_path, is_none_or_empty, timer
-from khoj.utils.jsonl import dump_jsonl, compress_jsonl_data
+from khoj.utils.jsonl import compress_jsonl_data
 from khoj.utils.rawconfig import Entry


@@ -19,7 +19,7 @@ logger = logging.getLogger(__name__)

 class PdfToJsonl(TextToJsonl):
    # Define Functions
-    def process(self, previous_entries=None):
+    def process(self, previous_entries=[]):
        # Extract required fields from config
        pdf_files, pdf_file_filter, output_file = (
            self.config.input_files,
@@ -45,12 +45,9 @@ class PdfToJsonl(TextToJsonl):

        # Identify, mark and merge any new entries with previous entries
        with timer("Identify new or updated entries", logger):
-            if not previous_entries:
-                entries_with_ids = list(enumerate(current_entries))
-            else:
-                entries_with_ids = TextToJsonl.mark_entries_for_update(
-                    current_entries, previous_entries, key="compiled", logger=logger
-                )
+            entries_with_ids = TextToJsonl.mark_entries_for_update(
+                current_entries, previous_entries, key="compiled", logger=logger
+            )

        with timer("Write PDF entries to JSONL file", logger):
            # Process Each Entry from All Notes Files
@@ -58,10 +55,7 @@ class PdfToJsonl(TextToJsonl):
            jsonl_data = PdfToJsonl.convert_pdf_maps_to_jsonl(entries)

            # Compress JSONL formatted Data
-            if output_file.suffix == ".gz":
-                compress_jsonl_data(jsonl_data, output_file)
-            elif output_file.suffix == ".jsonl":
-                dump_jsonl(jsonl_data, output_file)
+            compress_jsonl_data(jsonl_data, output_file)

        return entries_with_ids

--- a/src/khoj/processor/text_to_jsonl.py
+++ b/src/khoj/processor/text_to_jsonl.py
@@ -17,7 +17,7 @@ class TextToJsonl(ABC):
        self.config = config

    @abstractmethod
-    def process(self, previous_entries: List[Entry] = None) -> List[Tuple[int, Entry]]:
+    def process(self, previous_entries: List[Entry] = []) -> List[Tuple[int, Entry]]:
        ...

    @staticmethod
@@ -78,16 +78,23 @@ class TextToJsonl(ABC):
            # All entries that exist in both current and previous sets are kept
            existing_entry_hashes = set(current_entry_hashes) & set(previous_entry_hashes)

+            # load new entries in the order in which they are processed for a stable sort
+            new_entries = [
+                (current_entry_hashes.index(entry_hash), hash_to_current_entries[entry_hash])
+                for entry_hash in new_entry_hashes
+            ]
+            new_entries_sorted = sorted(new_entries, key=lambda e: e[0])
            # Mark new entries with -1 id to flag for later embeddings generation
-            new_entries = [(-1, hash_to_current_entries[entry_hash]) for entry_hash in new_entry_hashes]
+            new_entries_sorted = [(-1, entry[1]) for entry in new_entries_sorted]
+
            # Set id of existing entries to their previous ids to reuse their existing encoded embeddings
            existing_entries = [
                (previous_entry_hashes.index(entry_hash), hash_to_previous_entries[entry_hash])
                for entry_hash in existing_entry_hashes
            ]
-
            existing_entries_sorted = sorted(existing_entries, key=lambda e: e[0])
-            entries_with_ids = existing_entries_sorted + new_entries
+
+            entries_with_ids = existing_entries_sorted + new_entries_sorted

        return entries_with_ids

--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -5,20 +5,20 @@ import time
 import yaml
 import logging
 import json
-from typing import List, Optional, Union
+from typing import Iterable, List, Optional, Union

 # External Packages
 from fastapi import APIRouter, HTTPException, Header, Request
 from sentence_transformers import util

 # Internal Packages
-from khoj.configure import configure_processor, configure_search
+from khoj.configure import configure_processor, configure_server
 from khoj.search_type import image_search, text_search
 from khoj.search_filter.date_filter import DateFilter
 from khoj.search_filter.file_filter import FileFilter
 from khoj.search_filter.word_filter import WordFilter
 from khoj.utils.config import TextSearchModel
-from khoj.utils.helpers import log_telemetry, timer
+from khoj.utils.helpers import timer
 from khoj.utils.rawconfig import (
    ContentConfig,
    FullConfig,
@@ -34,7 +34,7 @@ from khoj.utils.state import SearchType
 from khoj.utils import state, constants
 from khoj.utils.yaml import save_config_to_file_updated_state
 from fastapi.responses import StreamingResponse, Response
-from khoj.routers.helpers import perform_chat_checks, generate_chat_response
+from khoj.routers.helpers import perform_chat_checks, generate_chat_response, update_telemetry_state
 from khoj.processor.conversation.gpt import extract_questions
 from fastapi.requests import Request

@@ -56,15 +56,44 @@ if not state.demo:
        return state.config

    @api.post("/config/data")
-    async def set_config_data(updated_config: FullConfig):
+    async def set_config_data(
+        request: Request,
+        updated_config: FullConfig,
+        client: Optional[str] = None,
+    ):
        state.config = updated_config
        with open(state.config_file, "w") as outfile:
            yaml.dump(yaml.safe_load(state.config.json(by_alias=True)), outfile)
            outfile.close()
+
+        configuration_update_metadata = dict()
+
+        if state.config.content_type is not None:
+            configuration_update_metadata["github"] = state.config.content_type.github is not None
+            configuration_update_metadata["notion"] = state.config.content_type.notion is not None
+            configuration_update_metadata["org"] = state.config.content_type.org is not None
+            configuration_update_metadata["pdf"] = state.config.content_type.pdf is not None
+            configuration_update_metadata["markdown"] = state.config.content_type.markdown is not None
+            configuration_update_metadata["plugins"] = state.config.content_type.plugins is not None
+
+        if state.config.processor is not None:
+            configuration_update_metadata["conversation_processor"] = state.config.processor.conversation is not None
+
+        update_telemetry_state(
+            request=request,
+            telemetry_type="api",
+            api="set_config",
+            client=client,
+            metadata=configuration_update_metadata,
+        )
        return state.config

    @api.post("/config/data/content_type/github", status_code=200)
-    async def set_content_config_github_data(updated_config: Union[GithubContentConfig, None]):
+    async def set_content_config_github_data(
+        request: Request,
+        updated_config: Union[GithubContentConfig, None],
+        client: Optional[str] = None,
+    ):
        _initialize_config()

        if not state.config.content_type:
@@ -72,6 +101,14 @@ if not state.demo:
        else:
            state.config.content_type.github = updated_config

+        update_telemetry_state(
+            request=request,
+            telemetry_type="api",
+            api="set_content_config",
+            client=client,
+            metadata={"content_type": "github"},
+        )
+
        try:
            save_config_to_file_updated_state()
            return {"status": "ok"}
@@ -79,7 +116,11 @@ if not state.demo:
            return {"status": "error", "message": str(e)}

    @api.post("/config/data/content_type/notion", status_code=200)
-    async def set_content_config_notion_data(updated_config: Union[NotionContentConfig, None]):
+    async def set_content_config_notion_data(
+        request: Request,
+        updated_config: Union[NotionContentConfig, None],
+        client: Optional[str] = None,
+    ):
        _initialize_config()

        if not state.config.content_type:
@@ -87,6 +128,14 @@ if not state.demo:
        else:
            state.config.content_type.notion = updated_config

+        update_telemetry_state(
+            request=request,
+            telemetry_type="api",
+            api="set_content_config",
+            client=client,
+            metadata={"content_type": "notion"},
+        )
+
        try:
            save_config_to_file_updated_state()
            return {"status": "ok"}
@@ -94,25 +143,37 @@ if not state.demo:
            return {"status": "error", "message": str(e)}

    @api.post("/delete/config/data/content_type/{content_type}", status_code=200)
-    async def remove_content_config_data(content_type: str):
+    async def remove_content_config_data(
+        request: Request,
+        content_type: str,
+        client: Optional[str] = None,
+    ):
        if not state.config or not state.config.content_type:
            return {"status": "ok"}

+        update_telemetry_state(
+            request=request,
+            telemetry_type="api",
+            api="delete_content_config",
+            client=client,
+            metadata={"content_type": content_type},
+        )
+
        if state.config.content_type:
            state.config.content_type[content_type] = None

        if content_type == "github":
-            state.model.github_search = None
+            state.content_index.github = None
        elif content_type == "notion":
-            state.model.notion_search = None
+            state.content_index.notion = None
        elif content_type == "plugins":
-            state.model.plugin_search = None
+            state.content_index.plugins = None
        elif content_type == "pdf":
-            state.model.pdf_search = None
+            state.content_index.pdf = None
        elif content_type == "markdown":
-            state.model.markdown_search = None
+            state.content_index.markdown = None
        elif content_type == "org":
-            state.model.org_search = None
+            state.content_index.org = None

        try:
            save_config_to_file_updated_state()
@@ -121,12 +182,23 @@ if not state.demo:
            return {"status": "error", "message": str(e)}

    @api.post("/delete/config/data/processor/conversation", status_code=200)
-    async def remove_processor_conversation_config_data():
+    async def remove_processor_conversation_config_data(
+        request: Request,
+        client: Optional[str] = None,
+    ):
        if not state.config or not state.config.processor or not state.config.processor.conversation:
            return {"status": "ok"}

        state.config.processor.conversation = None

+        update_telemetry_state(
+            request=request,
+            telemetry_type="api",
+            api="delete_processor_config",
+            client=client,
+            metadata={"processor_type": "conversation"},
+        )
+
        try:
            save_config_to_file_updated_state()
            return {"status": "ok"}
@@ -134,7 +206,12 @@ if not state.demo:
            return {"status": "error", "message": str(e)}

    @api.post("/config/data/content_type/{content_type}", status_code=200)
-    async def set_content_config_data(content_type: str, updated_config: Union[TextContentConfig, None]):
+    async def set_content_config_data(
+        request: Request,
+        content_type: str,
+        updated_config: Union[TextContentConfig, None],
+        client: Optional[str] = None,
+    ):
        _initialize_config()

        if not state.config.content_type:
@@ -142,6 +219,14 @@ if not state.demo:
        else:
            state.config.content_type[content_type] = updated_config

+        update_telemetry_state(
+            request=request,
+            telemetry_type="api",
+            api="set_content_config",
+            client=client,
+            metadata={"content_type": content_type},
+        )
+
        try:
            save_config_to_file_updated_state()
            return {"status": "ok"}
@@ -149,11 +234,24 @@ if not state.demo:
            return {"status": "error", "message": str(e)}

    @api.post("/config/data/processor/conversation", status_code=200)
-    async def set_processor_conversation_config_data(updated_config: Union[ConversationProcessorConfig, None]):
+    async def set_processor_conversation_config_data(
+        request: Request,
+        updated_config: Union[ConversationProcessorConfig, None],
+        client: Optional[str] = None,
+    ):
        _initialize_config()

        state.config.processor = ProcessorConfig(conversation=updated_config)
        state.processor_config = configure_processor(state.config.processor)
+
+        update_telemetry_state(
+            request=request,
+            telemetry_type="api",
+            api="set_content_config",
+            client=client,
+            metadata={"processor_type": "conversation"},
+        )
+
        try:
            save_config_to_file_updated_state()
            return {"status": "ok"}
@@ -182,7 +280,7 @@ def get_config_types():
        for search_type in SearchType
        if (
            search_type.value in configured_content_types
-            and getattr(state.model, f"{search_type.value}_search") is not None
+            and getattr(state.content_index, search_type.value) is not None
        )
        or ("plugins" in configured_content_types and search_type.name in configured_content_types["plugins"])
        or search_type == SearchType.All
@@ -210,7 +308,7 @@ async def search(
    if q is None or q == "":
        logger.warning(f"No query param (q) passed in API call to initiate search")
        return results
-    if not state.model or not any(state.model.__dict__.values()):
+    if not state.search_models or not any(state.search_models.__dict__.values()):
        logger.warning(f"No search models loaded. Configure a search model before initiating search")
        return results

@@ -234,7 +332,7 @@ async def search(
    encoded_asymmetric_query = None
    if t == SearchType.All or t != SearchType.Image:
        text_search_models: List[TextSearchModel] = [
-            model for model in state.model.__dict__.values() if isinstance(model, TextSearchModel)
+            model for model in state.search_models.__dict__.values() if isinstance(model, TextSearchModel)
        ]
        if text_search_models:
            with timer("Encoding query took", logger=logger):
@@ -247,13 +345,14 @@ async def search(
                )

    with concurrent.futures.ThreadPoolExecutor() as executor:
-        if (t == SearchType.Org or t == SearchType.All) and state.model.org_search:
+        if (t == SearchType.Org or t == SearchType.All) and state.content_index.org and state.search_models.text_search:
            # query org-mode notes
            search_futures += [
                executor.submit(
                    text_search.query,
                    user_query,
-                    state.model.org_search,
+                    state.search_models.text_search,
+                    state.content_index.org,
                    question_embedding=encoded_asymmetric_query,
                    rank_results=r or False,
                    score_threshold=score_threshold,
@@ -261,13 +360,18 @@ async def search(
                )
            ]

-        if (t == SearchType.Markdown or t == SearchType.All) and state.model.markdown_search:
+        if (
+            (t == SearchType.Markdown or t == SearchType.All)
+            and state.content_index.markdown
+            and state.search_models.text_search
+        ):
            # query markdown notes
            search_futures += [
                executor.submit(
                    text_search.query,
                    user_query,
-                    state.model.markdown_search,
+                    state.search_models.text_search,
+                    state.content_index.markdown,
                    question_embedding=encoded_asymmetric_query,
                    rank_results=r or False,
                    score_threshold=score_threshold,
@@ -275,13 +379,18 @@ async def search(
                )
            ]

-        if (t == SearchType.Github or t == SearchType.All) and state.model.github_search:
+        if (
+            (t == SearchType.Github or t == SearchType.All)
+            and state.content_index.github
+            and state.search_models.text_search
+        ):
            # query github issues
            search_futures += [
                executor.submit(
                    text_search.query,
                    user_query,
-                    state.model.github_search,
+                    state.search_models.text_search,
+                    state.content_index.github,
                    question_embedding=encoded_asymmetric_query,
                    rank_results=r or False,
                    score_threshold=score_threshold,
@@ -289,13 +398,14 @@ async def search(
                )
            ]

-        if (t == SearchType.Pdf or t == SearchType.All) and state.model.pdf_search:
+        if (t == SearchType.Pdf or t == SearchType.All) and state.content_index.pdf and state.search_models.text_search:
            # query pdf files
            search_futures += [
                executor.submit(
                    text_search.query,
                    user_query,
-                    state.model.pdf_search,
+                    state.search_models.text_search,
+                    state.content_index.pdf,
                    question_embedding=encoded_asymmetric_query,
                    rank_results=r or False,
                    score_threshold=score_threshold,
@@ -303,26 +413,38 @@ async def search(
                )
            ]

-        if (t == SearchType.Image) and state.model.image_search:
+        if (t == SearchType.Image) and state.content_index.image and state.search_models.image_search:
            # query images
            search_futures += [
                executor.submit(
                    image_search.query,
                    user_query,
                    results_count,
-                    state.model.image_search,
+                    state.search_models.image_search,
+                    state.content_index.image,
                    score_threshold=score_threshold,
                )
            ]

-        if (t == SearchType.All or t in SearchType) and state.model.plugin_search:
+        if (
+            (t == SearchType.All or t in SearchType)
+            and state.content_index.plugins
+            and state.search_models.plugin_search
+        ):
            # query specified plugin type
+            # Get plugin content, search model for specified search type, or the first one if none specified
+            plugin_search = state.search_models.plugin_search.get(t.value) or next(
+                iter(state.search_models.plugin_search.values())
+            )
+            plugin_content = state.content_index.plugins.get(t.value) or next(
+                iter(state.content_index.plugins.values())
+            )
            search_futures += [
                executor.submit(
                    text_search.query,
                    user_query,
-                    # Get plugin search model for specified search type, or the first one if none specified
-                    state.model.plugin_search.get(t.value) or next(iter(state.model.plugin_search.values())),
+                    plugin_search,
+                    plugin_content,
                    question_embedding=encoded_asymmetric_query,
                    rank_results=r or False,
                    score_threshold=score_threshold,
@@ -330,13 +452,18 @@ async def search(
                )
            ]

-        if (t == SearchType.Notion or t == SearchType.All) and state.model.notion_search:
+        if (
+            (t == SearchType.Notion or t == SearchType.All)
+            and state.content_index.notion
+            and state.search_models.text_search
+        ):
            # query notion pages
            search_futures += [
                executor.submit(
                    text_search.query,
                    user_query,
-                    state.model.notion_search,
+                    state.search_models.text_search,
+                    state.content_index.notion,
                    question_embedding=encoded_asymmetric_query,
                    rank_results=r or False,
                    score_threshold=score_threshold,
@@ -347,13 +474,13 @@ async def search(
        # Query across each requested content types in parallel
        with timer("Query took", logger):
            for search_future in concurrent.futures.as_completed(search_futures):
-                if t == SearchType.Image:
+                if t == SearchType.Image and state.content_index.image:
                    hits = await search_future.result()
                    output_directory = constants.web_directory / "images"
                    # Collate results
                    results += image_search.collate_results(
                        hits,
-                        image_names=state.model.image_search.image_names,
+                        image_names=state.content_index.image.image_names,
                        output_directory=output_directory,
                        image_files_url="/static/images",
                        count=results_count,
@@ -369,20 +496,16 @@ async def search(
    # Cache results
    state.query_cache[query_cache_key] = results

-    user_state = {
-        "client_host": request.client.host if request.client else "unknown",
-        "user_agent": user_agent or "unknown",
-        "referer": referer or "unknown",
-        "host": host or "unknown",
-    }
+    update_telemetry_state(
+        request=request,
+        telemetry_type="api",
+        api="search",
+        client=client,
+        user_agent=user_agent,
+        referer=referer,
+        host=host,
+    )

-    # Only log telemetry if query is new and not a continuation of previous query
-    if state.previous_query is None or state.previous_query not in user_query:
-        state.telemetry += [
-            log_telemetry(
-                telemetry_type="api", api="search", client=client, app_config=state.config.app, properties=user_state
-            )
-        ]
    state.previous_query = user_query

    end_time = time.time()
@@ -401,42 +524,36 @@ def update(
    referer: Optional[str] = Header(None),
    host: Optional[str] = Header(None),
 ):
+    if not state.config:
+        error_msg = f"🚨 Khoj is not configured.\nConfigure it via http://localhost:42110/config, plugins or by editing {state.config_file}."
+        logger.warning(error_msg)
+        raise HTTPException(status_code=500, detail=error_msg)
    try:
-        state.search_index_lock.acquire()
-        try:
-            state.model = configure_search(state.model, state.config, regenerate=force or False, t=t)
-        except Exception as e:
-            logger.error(e)
-            raise HTTPException(status_code=500, detail=str(e))
-        finally:
-            state.search_index_lock.release()
-    except ValueError as e:
-        logger.error(e)
-        raise HTTPException(status_code=500, detail=str(e))
+        configure_server(state.config, regenerate=force or False, search_type=t)
+    except Exception as e:
+        error_msg = f"🚨 Failed to update server via API: {e}"
+        logger.error(error_msg, exc_info=True)
+        raise HTTPException(status_code=500, detail=error_msg)
    else:
-        logger.info("📬 Search index updated via API")
+        components = []
+        if state.search_models:
+            components.append("Search models")
+        if state.content_index:
+            components.append("Content index")
+        if state.processor_config:
+            components.append("Conversation processor")
+        components_msg = ", ".join(components)
+        logger.info(f"📬 {components_msg} updated via API")

-    try:
-        if state.config and state.config.processor:
-            state.processor_config = configure_processor(state.config.processor)
-    except ValueError as e:
-        logger.error(e)
-        raise HTTPException(status_code=500, detail=str(e))
-    else:
-        logger.info("📬 Processor reconfigured via API")
-
-    user_state = {
-        "client_host": request.client.host if request.client else None,
-        "user_agent": user_agent or "unknown",
-        "referer": referer or "unknown",
-        "host": host or "unknown",
-    }
-
-    state.telemetry += [
-        log_telemetry(
-            telemetry_type="api", api="update", client=client, app_config=state.config.app, properties=user_state
-        )
-    ]
+    update_telemetry_state(
+        request=request,
+        telemetry_type="api",
+        api="update",
+        client=client,
+        user_agent=user_agent,
+        referer=referer,
+        host=host,
+    )

    return {"status": "ok", "message": "khoj reloaded"}

@@ -454,18 +571,15 @@ def chat_history(
    # Load Conversation History
    meta_log = state.processor_config.conversation.meta_log

-    user_state = {
-        "client_host": request.client.host if request.client else None,
-        "user_agent": user_agent or "unknown",
-        "referer": referer or "unknown",
-        "host": host or "unknown",
-    }
-
-    state.telemetry += [
-        log_telemetry(
-            telemetry_type="api", api="chat", client=client, app_config=state.config.app, properties=user_state
-        )
-    ]
+    update_telemetry_state(
+        request=request,
+        telemetry_type="api",
+        api="chat",
+        client=client,
+        user_agent=user_agent,
+        referer=referer,
+        host=host,
+    )

    return {"status": "ok", "response": meta_log.get("chat", [])}

@@ -509,18 +623,15 @@ async def chat(

    response_obj = {"response": actual_response, "context": compiled_references}

-    user_state = {
-        "client_host": request.client.host if request.client else None,
-        "user_agent": user_agent or "unknown",
-        "referer": referer or "unknown",
-        "host": host or "unknown",
-    }
-
-    state.telemetry += [
-        log_telemetry(
-            telemetry_type="api", api="chat", client=client, app_config=state.config.app, properties=user_state
-        )
-    ]
+    update_telemetry_state(
+        request=request,
+        telemetry_type="api",
+        api="chat",
+        client=client,
+        user_agent=user_agent,
+        referer=referer,
+        host=host,
+    )

    return Response(content=json.dumps(response_obj), media_type="application/json", status_code=200)

--- a/src/khoj/routers/helpers.py
+++ b/src/khoj/routers/helpers.py
@@ -1,11 +1,12 @@
-from fastapi import HTTPException
 import logging
 from datetime import datetime
 from functools import partial
-from typing import List
+from typing import List, Optional
+
+from fastapi import HTTPException, Request

 from khoj.utils import state
-from khoj.utils.helpers import timer
+from khoj.utils.helpers import timer, log_telemetry
 from khoj.processor.conversation.gpt import converse
 from khoj.processor.conversation.utils import message_to_log, reciprocal_conversation_to_chatml

@@ -24,6 +25,33 @@ def perform_chat_checks():
        )


+def update_telemetry_state(
+    request: Request,
+    telemetry_type: str,
+    api: str,
+    client: Optional[str] = None,
+    user_agent: Optional[str] = None,
+    referer: Optional[str] = None,
+    host: Optional[str] = None,
+    metadata: Optional[dict] = None,
+):
+    user_state = {
+        "client_host": request.client.host if request.client else None,
+        "user_agent": user_agent or "unknown",
+        "referer": referer or "unknown",
+        "host": host or "unknown",
+    }
+
+    if metadata:
+        user_state.update(metadata)
+
+    state.telemetry += [
+        log_telemetry(
+            telemetry_type=telemetry_type, api=api, client=client, app_config=state.config.app, properties=user_state
+        )
+    ]
+
+
 def generate_chat_response(
    q: str,
    meta_log: dict,
--- a/src/khoj/routers/web_client.py
+++ b/src/khoj/routers/web_client.py
@@ -39,7 +39,44 @@ if not state.demo:
            processor=None,
        )
        current_config = state.config or json.loads(default_full_config.json())
-        return templates.TemplateResponse("config.html", context={"request": request, "current_config": current_config})
+
+        successfully_configured = {
+            "pdf": False,
+            "markdown": False,
+            "org": False,
+            "image": False,
+            "github": False,
+            "notion": False,
+            "conversation": False,
+        }
+
+        if state.content_index:
+            successfully_configured.update(
+                {
+                    "pdf": state.content_index.pdf is not None,
+                    "markdown": state.content_index.markdown is not None,
+                    "org": state.content_index.org is not None,
+                    "image": state.content_index.image is not None,
+                    "github": state.content_index.github is not None,
+                    "notion": state.content_index.notion is not None,
+                }
+            )
+
+        if state.processor_config:
+            successfully_configured.update(
+                {
+                    "conversation": state.processor_config.conversation is not None,
+                }
+            )
+
+        return templates.TemplateResponse(
+            "config.html",
+            context={
+                "request": request,
+                "current_config": current_config,
+                "current_model_state": successfully_configured,
+            },
+        )

    @web_client.get("/config/content_type/github", response_class=HTMLResponse)
    def github_config_page(request: Request):
--- a/src/khoj/search_type/image_search.py
+++ b/src/khoj/search_type/image_search.py
@@ -12,10 +12,12 @@ from sentence_transformers import SentenceTransformer, util
 from PIL import Image
 from tqdm import trange
 import torch
+from khoj.utils import state

 # Internal Packages
 from khoj.utils.helpers import get_absolute_path, get_from_dict, resolve_absolute_path, load_model, timer
-from khoj.utils.config import ImageSearchModel
+from khoj.utils.config import ImageContent, ImageSearchModel
+from khoj.utils.models import BaseEncoder
 from khoj.utils.rawconfig import ImageContentConfig, ImageSearchConfig, SearchResponse


@@ -40,7 +42,7 @@ def initialize_model(search_config: ImageSearchConfig):
        model_type=search_config.encoder_type or SentenceTransformer,
    )

-    return encoder
+    return ImageSearchModel(encoder)


 def extract_entries(image_directories):
@@ -143,7 +145,9 @@ def extract_metadata(image_name):
    return image_processed_metadata


-async def query(raw_query, count, model: ImageSearchModel, score_threshold: float = -math.inf):
+async def query(
+    raw_query, count, search_model: ImageSearchModel, content: ImageContent, score_threshold: float = -math.inf
+):
    # Set query to image content if query is of form file:/path/to/file.png
    if raw_query.startswith("file:") and pathlib.Path(raw_query[5:]).is_file():
        query_imagepath = resolve_absolute_path(pathlib.Path(raw_query[5:]), strict=True)
@@ -158,21 +162,21 @@ async def query(raw_query, count, model: ImageSearchModel, score_threshold: floa

    # Now we encode the query (which can either be an image or a text string)
    with timer("Query Encode Time", logger):
-        query_embedding = model.image_encoder.encode([query], convert_to_tensor=True, show_progress_bar=False)
+        query_embedding = search_model.image_encoder.encode([query], convert_to_tensor=True, show_progress_bar=False)

    # Compute top_k ranked images based on cosine-similarity b/w query and all image embeddings.
    with timer("Search Time", logger):
        image_hits = {
            result["corpus_id"]: {"image_score": result["score"], "score": result["score"]}
-            for result in util.semantic_search(query_embedding, model.image_embeddings, top_k=count)[0]
+            for result in util.semantic_search(query_embedding, content.image_embeddings, top_k=count)[0]
        }

    # Compute top_k ranked images based on cosine-similarity b/w query and all image metadata embeddings.
-    if model.image_metadata_embeddings:
+    if content.image_metadata_embeddings:
        with timer("Metadata Search Time", logger):
            metadata_hits = {
                result["corpus_id"]: result["score"]
-                for result in util.semantic_search(query_embedding, model.image_metadata_embeddings, top_k=count)[0]
+                for result in util.semantic_search(query_embedding, content.image_metadata_embeddings, top_k=count)[0]
            }

        # Sum metadata, image scores of the highest ranked images
@@ -239,10 +243,7 @@ def collate_results(hits, image_names, output_directory, image_files_url, count=
    return results


-def setup(config: ImageContentConfig, search_config: ImageSearchConfig, regenerate: bool) -> ImageSearchModel:
-    # Initialize Model
-    encoder = initialize_model(search_config)
-
+def setup(config: ImageContentConfig, encoder: BaseEncoder, regenerate: bool) -> ImageContent:
    # Extract Entries
    absolute_image_files, filtered_image_files = set(), set()
    if config.input_directories:
@@ -268,4 +269,4 @@ def setup(config: ImageContentConfig, search_config: ImageSearchConfig, regenera
        use_xmp_metadata=config.use_xmp_metadata,
    )

-    return ImageSearchModel(all_image_files, image_embeddings, image_metadata_embeddings, encoder)
+    return ImageContent(all_image_files, image_embeddings, image_metadata_embeddings)
--- a/src/khoj/search_type/text_search.py
+++ b/src/khoj/search_type/text_search.py
@@ -13,7 +13,7 @@ from khoj.search_filter.base_filter import BaseFilter
 # Internal Packages
 from khoj.utils import state
 from khoj.utils.helpers import get_absolute_path, is_none_or_empty, resolve_absolute_path, load_model, timer
-from khoj.utils.config import TextSearchModel
+from khoj.utils.config import TextContent, TextSearchModel
 from khoj.utils.models import BaseEncoder
 from khoj.utils.rawconfig import SearchResponse, TextSearchConfig, TextConfigBase, Entry
 from khoj.utils.jsonl import load_jsonl
@@ -26,9 +26,6 @@ def initialize_model(search_config: TextSearchConfig):
    "Initialize model for semantic search on text"
    torch.set_num_threads(4)

-    # Number of entries we want to retrieve with the bi-encoder
-    top_k = 15
-
    # If model directory is configured
    if search_config.model_directory:
        # Convert model directory to absolute path
@@ -52,7 +49,7 @@ def initialize_model(search_config: TextSearchConfig):
        device=f"{state.device}",
    )

-    return bi_encoder, cross_encoder, top_k
+    return TextSearchModel(bi_encoder, cross_encoder)


 def extract_entries(jsonl_file) -> List[Entry]:
@@ -61,60 +58,66 @@ def extract_entries(jsonl_file) -> List[Entry]:


 def compute_embeddings(
-    entries_with_ids: List[Tuple[int, Entry]], bi_encoder: BaseEncoder, embeddings_file: Path, regenerate=False
+    entries_with_ids: List[Tuple[int, Entry]],
+    bi_encoder: BaseEncoder,
+    embeddings_file: Path,
+    regenerate=False,
+    normalize=True,
 ):
    "Compute (and Save) Embeddings or Load Pre-Computed Embeddings"
-    new_entries = []
+    new_embeddings = torch.tensor([], device=state.device)
+    existing_embeddings = torch.tensor([], device=state.device)
+    create_index_msg = ""
    # Load pre-computed embeddings from file if exists and update them if required
    if embeddings_file.exists() and not regenerate:
-        corpus_embeddings = torch.load(get_absolute_path(embeddings_file), map_location=state.device)
+        corpus_embeddings: torch.Tensor = torch.load(get_absolute_path(embeddings_file), map_location=state.device)
        logger.debug(f"Loaded {len(corpus_embeddings)} text embeddings from {embeddings_file}")
-
-        # Encode any new entries in the corpus and update corpus embeddings
-        new_entries = [entry.compiled for id, entry in entries_with_ids if id == -1]
-        if new_entries:
-            logger.info(f"📩 Indexing {len(new_entries)} text entries.")
-            new_embeddings = bi_encoder.encode(
-                new_entries, convert_to_tensor=True, device=state.device, show_progress_bar=True
-            )
-            existing_entry_ids = [id for id, _ in entries_with_ids if id != -1]
-            if existing_entry_ids:
-                existing_embeddings = torch.index_select(
-                    corpus_embeddings, 0, torch.tensor(existing_entry_ids, device=state.device)
-                )
-            else:
-                existing_embeddings = torch.tensor([], device=state.device)
-            corpus_embeddings = torch.cat([existing_embeddings, new_embeddings], dim=0)
-    # Else compute the corpus embeddings from scratch
    else:
-        new_entries = [entry.compiled for _, entry in entries_with_ids]
-        logger.info(f"📩 Indexing {len(new_entries)} text entries. Creating index from scratch.")
-        corpus_embeddings = bi_encoder.encode(
+        corpus_embeddings = torch.tensor([], device=state.device)
+        create_index_msg = " Creating index from scratch."
+
+    # Encode any new entries in the corpus and update corpus embeddings
+    new_entries = [entry.compiled for id, entry in entries_with_ids if id == -1]
+    if new_entries:
+        logger.info(f"📩 Indexing {len(new_entries)} text entries.{create_index_msg}")
+        new_embeddings = bi_encoder.encode(
            new_entries, convert_to_tensor=True, device=state.device, show_progress_bar=True
        )

-    # Save regenerated or updated embeddings to file
-    if new_entries:
+    # Extract existing embeddings from previous corpus embeddings
+    existing_entry_ids = [id for id, _ in entries_with_ids if id != -1]
+    if existing_entry_ids:
+        existing_embeddings = torch.index_select(
+            corpus_embeddings, 0, torch.tensor(existing_entry_ids, device=state.device)
+        )
+
+    # Set corpus embeddings to merger of existing and new embeddings
+    corpus_embeddings = torch.cat([existing_embeddings, new_embeddings], dim=0)
+    if normalize:
+        # Normalize embeddings for faster lookup via dot product when querying
        corpus_embeddings = util.normalize_embeddings(corpus_embeddings)
-        torch.save(corpus_embeddings, embeddings_file)
-        logger.info(f"📩 Saved computed text embeddings to {embeddings_file}")
+
+    # Save regenerated or updated embeddings to file
+    torch.save(corpus_embeddings, embeddings_file)
+    logger.info(f"📩 Saved computed text embeddings to {embeddings_file}")

    return corpus_embeddings


 async def query(
    raw_query: str,
-    model: TextSearchModel,
+    search_model: TextSearchModel,
+    content: TextContent,
    question_embedding: Union[torch.Tensor, None] = None,
    rank_results: bool = False,
    score_threshold: float = -math.inf,
    dedupe: bool = True,
 ) -> Tuple[List[dict], List[Entry]]:
    "Search for entries that answer the query"
-    query, entries, corpus_embeddings = raw_query, model.entries, model.corpus_embeddings
+    query, entries, corpus_embeddings = raw_query, content.entries, content.corpus_embeddings

    # Filter query, entries and embeddings before semantic search
-    query, entries, corpus_embeddings = apply_filters(query, entries, corpus_embeddings, model.filters)
+    query, entries, corpus_embeddings = apply_filters(query, entries, corpus_embeddings, content.filters)

    # If no entries left after filtering, return empty results
    if entries is None or len(entries) == 0:
@@ -127,18 +130,17 @@ async def query(
    # Encode the query using the bi-encoder
    if question_embedding is None:
        with timer("Query Encode Time", logger, state.device):
-            question_embedding = model.bi_encoder.encode([query], convert_to_tensor=True, device=state.device)
+            question_embedding = search_model.bi_encoder.encode([query], convert_to_tensor=True, device=state.device)
            question_embedding = util.normalize_embeddings(question_embedding)

    # Find relevant entries for the query
+    top_k = min(len(entries), search_model.top_k or 10)  # top_k hits can't be more than the total entries in corpus
    with timer("Search Time", logger, state.device):
-        hits = util.semantic_search(
-            question_embedding, corpus_embeddings, top_k=model.top_k, score_function=util.dot_score
-        )[0]
+        hits = util.semantic_search(question_embedding, corpus_embeddings, top_k, score_function=util.dot_score)[0]

    # Score all retrieved entries using the cross-encoder
-    if rank_results:
-        hits = cross_encoder_score(model.cross_encoder, query, entries, hits)
+    if rank_results and search_model.cross_encoder:
+        hits = cross_encoder_score(search_model.cross_encoder, query, entries, hits)

    # Filter results by score threshold
    hits = [hit for hit in hits if hit.get("cross-score", hit.get("score")) >= score_threshold]
@@ -173,37 +175,34 @@ def collate_results(hits, entries: List[Entry], count=5) -> List[SearchResponse]
 def setup(
    text_to_jsonl: Type[TextToJsonl],
    config: TextConfigBase,
-    search_config: TextSearchConfig,
+    bi_encoder: BaseEncoder,
    regenerate: bool,
    filters: List[BaseFilter] = [],
-) -> TextSearchModel:
-    # Initialize Model
-    bi_encoder, cross_encoder, top_k = initialize_model(search_config)
-
+    normalize: bool = True,
+) -> TextContent:
    # Map notes in text files to (compressed) JSONL formatted file
    config.compressed_jsonl = resolve_absolute_path(config.compressed_jsonl)
-    previous_entries = (
-        extract_entries(config.compressed_jsonl) if config.compressed_jsonl.exists() and not regenerate else None
-    )
-    entries_with_indices = text_to_jsonl(config).process(previous_entries or [])
+    previous_entries = []
+    if config.compressed_jsonl.exists() and not regenerate:
+        previous_entries = extract_entries(config.compressed_jsonl)
+    entries_with_indices = text_to_jsonl(config).process(previous_entries)

    # Extract Updated Entries
    entries = extract_entries(config.compressed_jsonl)
    if is_none_or_empty(entries):
        config_params = ", ".join([f"{key}={value}" for key, value in config.dict().items()])
        raise ValueError(f"No valid entries found in specified files: {config_params}")
-    top_k = min(len(entries), top_k)  # top_k hits can't be more than the total entries in corpus

    # Compute or Load Embeddings
    config.embeddings_file = resolve_absolute_path(config.embeddings_file)
    corpus_embeddings = compute_embeddings(
-        entries_with_indices, bi_encoder, config.embeddings_file, regenerate=regenerate
+        entries_with_indices, bi_encoder, config.embeddings_file, regenerate=regenerate, normalize=normalize
    )

    for filter in filters:
        filter.load(entries, regenerate=regenerate)

-    return TextSearchModel(entries, corpus_embeddings, bi_encoder, cross_encoder, filters, top_k)
+    return TextContent(entries, corpus_embeddings, filters)


 def apply_filters(
--- a/src/khoj/utils/cli.py
+++ b/src/khoj/utils/cli.py
@@ -5,7 +5,7 @@ from importlib.metadata import version

 # Internal Packages
 from khoj.utils.helpers import resolve_absolute_path
-from khoj.utils.yaml import parse_config_from_file
+from khoj.utils.yaml import load_config_from_file, parse_config_from_file, save_config_to_file


 def cli(args=None):
@@ -23,7 +23,7 @@ def cli(args=None):
    )
    parser.add_argument("--verbose", "-v", action="count", default=0, help="Show verbose conversion logs. Default: 0")
    parser.add_argument("--host", type=str, default="127.0.0.1", help="Host address of the server. Default: 127.0.0.1")
-    parser.add_argument("--port", "-p", type=int, default=8000, help="Port of the server. Default: 8000")
+    parser.add_argument("--port", "-p", type=int, default=42110, help="Port of the server. Default: 42110")
    parser.add_argument(
        "--socket",
        type=pathlib.Path,
@@ -34,9 +34,10 @@ def cli(args=None):

    args = parser.parse_args(args)

+    args.version_no = version("khoj-assistant")
    if args.version:
        # Show version of khoj installed and exit
-        print(version("khoj-assistant"))
+        print(args.version_no)
        exit(0)

    # Normalize config_file path to absolute path
@@ -45,6 +46,22 @@ def cli(args=None):
    if not args.config_file.exists():
        args.config = None
    else:
+        args = migrate_config(args)
        args.config = parse_config_from_file(args.config_file)

    return args
+
+
+def migrate_config(args):
+    raw_config = load_config_from_file(args.config_file)
+
+    # Add version to khoj config schema
+    if "version" not in raw_config:
+        raw_config["version"] = args.version_no
+        save_config_to_file(raw_config, args.config_file)
+
+        # regenerate khoj index on first start of this version
+        # this should refresh index and apply index corruption fixes from #325
+        args.regenerate = True
+
+    return args
--- a/src/khoj/utils/config.py
+++ b/src/khoj/utils/config.py
@@ -3,7 +3,7 @@ from __future__ import annotations  # to avoid quoting type hints
 from enum import Enum
 from dataclasses import dataclass
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict, List, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Union

 # External Packages
 import torch
@@ -30,42 +30,48 @@ class ProcessorType(str, Enum):
    Conversation = "conversation"


+@dataclass
+class TextContent:
+    entries: List[Entry]
+    corpus_embeddings: torch.Tensor
+    filters: List[BaseFilter]
+
+
+@dataclass
+class ImageContent:
+    image_names: List[str]
+    image_embeddings: torch.Tensor
+    image_metadata_embeddings: torch.Tensor
+
+
+@dataclass
 class TextSearchModel:
-    def __init__(
-        self,
-        entries: List[Entry],
-        corpus_embeddings: torch.Tensor,
-        bi_encoder: BaseEncoder,
-        cross_encoder: CrossEncoder,
-        filters: List[BaseFilter],
-        top_k,
-    ):
-        self.entries = entries
-        self.corpus_embeddings = corpus_embeddings
-        self.bi_encoder = bi_encoder
-        self.cross_encoder = cross_encoder
-        self.filters = filters
-        self.top_k = top_k
+    bi_encoder: BaseEncoder
+    cross_encoder: Optional[CrossEncoder] = None
+    top_k: Optional[int] = 15


+@dataclass
 class ImageSearchModel:
-    def __init__(self, image_names, image_embeddings, image_metadata_embeddings, image_encoder: BaseEncoder):
-        self.image_encoder = image_encoder
-        self.image_names = image_names
-        self.image_embeddings = image_embeddings
-        self.image_metadata_embeddings = image_metadata_embeddings
-        self.image_encoder = image_encoder
+    image_encoder: BaseEncoder
+
+
+@dataclass
+class ContentIndex:
+    org: Optional[TextContent] = None
+    markdown: Optional[TextContent] = None
+    pdf: Optional[TextContent] = None
+    github: Optional[TextContent] = None
+    notion: Optional[TextContent] = None
+    image: Optional[ImageContent] = None
+    plugins: Optional[Dict[str, TextContent]] = None


@dataclass
 class SearchModels:
-    org_search: Union[TextSearchModel, None] = None
-    markdown_search: Union[TextSearchModel, None] = None
-    pdf_search: Union[TextSearchModel, None] = None
-    image_search: Union[ImageSearchModel, None] = None
-    github_search: Union[TextSearchModel, None] = None
-    notion_search: Union[TextSearchModel, None] = None
-    plugin_search: Union[Dict[str, TextSearchModel], None] = None
+    text_search: Optional[TextSearchModel] = None
+    image_search: Optional[ImageSearchModel] = None
+    plugin_search: Optional[Dict[str, TextSearchModel]] = None


 class ConversationProcessorConfigModel:
--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@@ -20,7 +20,7 @@ from khoj.utils import constants

 if TYPE_CHECKING:
    # External Packages
-    from sentence_transformers import CrossEncoder
+    from sentence_transformers import SentenceTransformer, CrossEncoder

    # Internal Packages
    from khoj.utils.models import BaseEncoder
@@ -64,7 +64,9 @@ def merge_dicts(priority_dict: dict, default_dict: dict):
    return merged_dict


-def load_model(model_name: str, model_type, model_dir=None, device: str = None) -> Union[BaseEncoder, CrossEncoder]:
+def load_model(
+    model_name: str, model_type, model_dir=None, device: str = None
+) -> Union[BaseEncoder, SentenceTransformer, CrossEncoder]:
    "Load model from disk or huggingface"
    # Construct model path
    logger = logging.getLogger(__name__)
--- a/src/khoj/utils/jsonl.py
+++ b/src/khoj/utils/jsonl.py
@@ -20,7 +20,7 @@ def load_jsonl(input_path):
    # Open JSONL file
    if input_path.suffix == ".gz":
        jsonl_file = gzip.open(get_absolute_path(input_path), "rt", encoding="utf-8")
-    elif input_path.suffix == ".jsonl":
+    else:
        jsonl_file = open(get_absolute_path(input_path), "r", encoding="utf-8")

    # Read JSONL file
@@ -36,17 +36,6 @@ def load_jsonl(input_path):
    return data


-def dump_jsonl(jsonl_data, output_path):
-    "Write List of JSON objects to JSON line file"
-    # Create output directory, if it doesn't exist
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-
-    with open(output_path, "w", encoding="utf-8") as f:
-        f.write(jsonl_data)
-
-    logger.debug(f"Wrote jsonl data to {output_path}")
-
-
 def compress_jsonl_data(jsonl_data, output_path):
    # Create output directory, if it doesn't exist
    output_path.parent.mkdir(parents=True, exist_ok=True)
--- a/src/khoj/utils/rawconfig.py
+++ b/src/khoj/utils/rawconfig.py
@@ -119,10 +119,11 @@ class AppConfig(ConfigBase):


 class FullConfig(ConfigBase):
-    content_type: Optional[ContentConfig]
-    search_type: Optional[SearchConfig]
-    processor: Optional[ProcessorConfig]
+    content_type: Optional[ContentConfig] = None
+    search_type: Optional[SearchConfig] = None
+    processor: Optional[ProcessorConfig] = None
    app: Optional[AppConfig] = AppConfig(should_log_telemetry=True)
+    version: Optional[str] = None


 class SearchResponse(ConfigBase):
--- a/src/khoj/utils/state.py
+++ b/src/khoj/utils/state.py
@@ -9,13 +9,14 @@ from pathlib import Path

 # Internal Packages
 from khoj.utils import config as utils_config
-from khoj.utils.config import SearchModels, ProcessorConfigModel
+from khoj.utils.config import ContentIndex, SearchModels, ProcessorConfigModel
 from khoj.utils.helpers import LRU
 from khoj.utils.rawconfig import FullConfig

 # Application Global State
 config = FullConfig()
-model = SearchModels()
+search_models = SearchModels()
+content_index = ContentIndex()
 processor_config = ProcessorConfigModel()
 config_file: Path = None
 verbose: int = 0
@@ -23,7 +24,7 @@ host: str = None
 port: int = None
 cli_args: List[str] = None
 query_cache = LRU()
-search_index_lock = threading.Lock()
+config_lock = threading.Lock()
 SearchType = utils_config.SearchType
 telemetry: List[Dict[str, str]] = []
 previous_query: str = None
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,6 +10,7 @@ from khoj.main import app
 from khoj.configure import configure_processor, configure_routes, configure_search_types
 from khoj.processor.markdown.markdown_to_jsonl import MarkdownToJsonl
 from khoj.search_type import image_search, text_search
+from khoj.utils.config import ImageContent, SearchModels, TextContent
 from khoj.utils.helpers import resolve_absolute_path
 from khoj.utils.rawconfig import (
    ContentConfig,
@@ -41,50 +42,66 @@ def search_config() -> SearchConfig:
        encoder="sentence-transformers/all-MiniLM-L6-v2",
        cross_encoder="cross-encoder/ms-marco-MiniLM-L-6-v2",
        model_directory=model_dir / "symmetric/",
+        encoder_type=None,
    )

    search_config.asymmetric = TextSearchConfig(
        encoder="sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
        cross_encoder="cross-encoder/ms-marco-MiniLM-L-6-v2",
        model_directory=model_dir / "asymmetric/",
+        encoder_type=None,
    )

    search_config.image = ImageSearchConfig(
-        encoder="sentence-transformers/clip-ViT-B-32", model_directory=model_dir / "image/"
+        encoder="sentence-transformers/clip-ViT-B-32",
+        model_directory=model_dir / "image/",
+        encoder_type=None,
    )

    return search_config


@pytest.fixture(scope="session")
-def content_config(tmp_path_factory, search_config: SearchConfig):
+def search_models(search_config: SearchConfig):
+    search_models = SearchModels()
+    search_models.text_search = text_search.initialize_model(search_config.asymmetric)
+    search_models.image_search = image_search.initialize_model(search_config.image)
+
+    return search_models
+
+
+@pytest.fixture(scope="session")
+def content_config(tmp_path_factory, search_models: SearchModels, search_config: SearchConfig):
    content_dir = tmp_path_factory.mktemp("content")

    # Generate Image Embeddings from Test Images
    content_config = ContentConfig()
    content_config.image = ImageContentConfig(
+        input_filter=None,
        input_directories=["tests/data/images"],
        embeddings_file=content_dir.joinpath("image_embeddings.pt"),
        batch_size=1,
        use_xmp_metadata=False,
    )

-    image_search.setup(content_config.image, search_config.image, regenerate=False)
+    image_search.setup(content_config.image, search_models.image_search.image_encoder, regenerate=False)

    # Generate Notes Embeddings from Test Notes
    content_config.org = TextContentConfig(
        input_files=None,
        input_filter=["tests/data/org/*.org"],
-        compressed_jsonl=content_dir.joinpath("notes.jsonl"),
+        compressed_jsonl=content_dir.joinpath("notes.jsonl.gz"),
        embeddings_file=content_dir.joinpath("note_embeddings.pt"),
    )

    filters = [DateFilter(), WordFilter(), FileFilter()]
-    text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
+    text_search.setup(
+        OrgToJsonl, content_config.org, search_models.text_search.bi_encoder, regenerate=False, filters=filters
+    )

    content_config.plugins = {
        "plugin1": TextContentConfig(
-            input_files=[content_dir.joinpath("notes.jsonl")],
+            input_files=[content_dir.joinpath("notes.jsonl.gz")],
            input_filter=None,
            compressed_jsonl=content_dir.joinpath("plugin.jsonl.gz"),
            embeddings_file=content_dir.joinpath("plugin_embeddings.pt"),
@@ -106,7 +123,11 @@ def content_config(tmp_path_factory, search_config: SearchConfig):

    filters = [DateFilter(), WordFilter(), FileFilter()]
    text_search.setup(
-        JsonlToJsonl, content_config.plugins["plugin1"], search_config.asymmetric, regenerate=False, filters=filters
+        JsonlToJsonl,
+        content_config.plugins["plugin1"],
+        search_models.text_search.bi_encoder,
+        regenerate=False,
+        filters=filters,
    )

    return content_config
@@ -121,7 +142,7 @@ def md_content_config(tmp_path_factory):
    content_config.markdown = TextContentConfig(
        input_files=None,
        input_filter=["tests/data/markdown/*.markdown"],
-        compressed_jsonl=content_dir.joinpath("markdown.jsonl"),
+        compressed_jsonl=content_dir.joinpath("markdown.jsonl.gz"),
        embeddings_file=content_dir.joinpath("markdown_embeddings.pt"),
    )

@@ -157,8 +178,13 @@ def chat_client(md_content_config: ContentConfig, search_config: SearchConfig, p

    # Index Markdown Content for Search
    filters = [DateFilter(), WordFilter(), FileFilter()]
-    state.model.markdown_search = text_search.setup(
-        MarkdownToJsonl, md_content_config.markdown, search_config.asymmetric, regenerate=False, filters=filters
+    state.search_models.text_search = text_search.initialize_model(search_config.asymmetric)
+    state.content_index.markdown = text_search.setup(
+        MarkdownToJsonl,
+        md_content_config.markdown,
+        state.search_models.text_search.bi_encoder,
+        regenerate=False,
+        filters=filters,
    )

    # Initialize Processor from Config
@@ -175,8 +201,14 @@ def client(content_config: ContentConfig, search_config: SearchConfig, processor
    state.SearchType = configure_search_types(state.config)

    # These lines help us Mock the Search models for these search types
-    state.model.org_search = {}
-    state.model.image_search = {}
+    state.search_models.text_search = text_search.initialize_model(search_config.asymmetric)
+    state.search_models.image_search = image_search.initialize_model(search_config.image)
+    state.content_index.org = text_search.setup(
+        OrgToJsonl, content_config.org, state.search_models.text_search.bi_encoder, regenerate=False
+    )
+    state.content_index.image = image_search.setup(
+        content_config.image, state.search_models.image_search, regenerate=False
+    )

    configure_routes(app)
    return TestClient(app)
--- a/tests/data/org/main_readme.org
+++ b/tests/data/org/main_readme.org
@@ -27,9 +27,9 @@
     - Run ~M-x khoj <user-query>~ or Call ~C-c C-s~

   - *Khoj via API*
-     - Query: ~GET~ [[http://localhost:8000/api/search?q=%22what%20is%20the%20meaning%20of%20life%22][http://localhost:8000/api/search?q="What is the meaning of life"]]
-     - Update Index: ~GET~ [[http://localhost:8000/api/update][http://localhost:8000/api/update]]
-     - [[http://localhost:8000/docs][Khoj API Docs]]
+     - Query: ~GET~ [[http://localhost:42110/api/search?q=%22what%20is%20the%20meaning%20of%20life%22][http://localhost:42110/api/search?q="What is the meaning of life"]]
+     - Update Index: ~GET~ [[http://localhost:42110/api/update][http://localhost:42110/api/update]]
+     - [[http://localhost:42110/docs][Khoj API Docs]]

   - *Call Khoj via Python Script Directly*
     #+begin_src shell
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -11,7 +11,8 @@ from fastapi.testclient import TestClient
 from khoj.main import app
 from khoj.configure import configure_routes, configure_search_types
 from khoj.utils import state
-from khoj.utils.state import model, config
+from khoj.utils.config import SearchModels
+from khoj.utils.state import search_models, content_index, config
 from khoj.search_type import text_search, image_search
 from khoj.utils.rawconfig import ContentConfig, SearchConfig
 from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
@@ -34,11 +35,11 @@ def test_search_with_invalid_content_type(client):

 # ----------------------------------------------------------------------------------------------------
 def test_search_with_valid_content_type(client):
-    for content_type in ["all", "org", "markdown", "image", "pdf", "plugin1"]:
+    for content_type in ["all", "org", "markdown", "image", "pdf", "github", "notion", "plugin1"]:
        # Act
        response = client.get(f"/api/search?q=random&t={content_type}")
        # Assert
-        assert response.status_code == 200
+        assert response.status_code == 200, f"Returned status: {response.status_code} for content type: {content_type}"


 # ----------------------------------------------------------------------------------------------------
@@ -52,11 +53,11 @@ def test_update_with_invalid_content_type(client):

 # ----------------------------------------------------------------------------------------------------
 def test_update_with_valid_content_type(client):
-    for content_type in ["org", "markdown", "image", "pdf", "plugin1"]:
+    for content_type in ["all", "org", "markdown", "image", "pdf", "github", "notion", "plugin1"]:
        # Act
        response = client.get(f"/api/update?t={content_type}")
        # Assert
-        assert response.status_code == 200
+        assert response.status_code == 200, f"Returned status: {response.status_code} for content type: {content_type}"


 # ----------------------------------------------------------------------------------------------------
@@ -70,11 +71,11 @@ def test_regenerate_with_invalid_content_type(client):

 # ----------------------------------------------------------------------------------------------------
 def test_regenerate_with_valid_content_type(client):
-    for content_type in ["org", "markdown", "image", "pdf", "plugin1"]:
+    for content_type in ["all", "org", "markdown", "image", "pdf", "github", "notion", "plugin1"]:
        # Act
        response = client.get(f"/api/update?force=true&t={content_type}")
        # Assert
-        assert response.status_code == 200
+        assert response.status_code == 200, f"Returned status: {response.status_code} for content type: {content_type}"


 # ----------------------------------------------------------------------------------------------------
@@ -143,7 +144,10 @@ def test_get_configured_types_with_no_content_config():
 # ----------------------------------------------------------------------------------------------------
 def test_image_search(client, content_config: ContentConfig, search_config: SearchConfig):
    # Arrange
-    model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
+    search_models.image_search = image_search.initialize_model(search_config.image)
+    content_index.image = image_search.setup(
+        content_config.image, search_models.image_search.image_encoder, regenerate=False
+    )
    query_expected_image_pairs = [
        ("kitten", "kitten_park.jpg"),
        ("a horse and dog on a leash", "horse_dog.jpg"),
@@ -166,7 +170,10 @@ def test_image_search(client, content_config: ContentConfig, search_config: Sear
 # ----------------------------------------------------------------------------------------------------
 def test_notes_search(client, content_config: ContentConfig, search_config: SearchConfig):
    # Arrange
-    model.org_search = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False)
+    search_models.text_search = text_search.initialize_model(search_config.asymmetric)
+    content_index.org = text_search.setup(
+        OrgToJsonl, content_config.org, search_models.text_search.bi_encoder, regenerate=False
+    )
    user_query = quote("How to git install application?")

    # Act
@@ -183,8 +190,9 @@ def test_notes_search(client, content_config: ContentConfig, search_config: Sear
 def test_notes_search_with_only_filters(client, content_config: ContentConfig, search_config: SearchConfig):
    # Arrange
    filters = [WordFilter(), FileFilter()]
-    model.org_search = text_search.setup(
-        OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters
+    search_models.text_search = text_search.initialize_model(search_config.asymmetric)
+    content_index.org = text_search.setup(
+        OrgToJsonl, content_config.org, search_models.text_search.bi_encoder, regenerate=False, filters=filters
    )
    user_query = quote('+"Emacs" file:"*.org"')

@@ -202,8 +210,9 @@ def test_notes_search_with_only_filters(client, content_config: ContentConfig, s
 def test_notes_search_with_include_filter(client, content_config: ContentConfig, search_config: SearchConfig):
    # Arrange
    filters = [WordFilter()]
-    model.org_search = text_search.setup(
-        OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters
+    search_models.text_search = text_search.initialize_model(search_config.asymmetric)
+    content_index.org = text_search.setup(
+        OrgToJsonl, content_config.org, search_models.text_search, regenerate=False, filters=filters
    )
    user_query = quote('How to git install application? +"Emacs"')

@@ -221,8 +230,9 @@ def test_notes_search_with_include_filter(client, content_config: ContentConfig,
 def test_notes_search_with_exclude_filter(client, content_config: ContentConfig, search_config: SearchConfig):
    # Arrange
    filters = [WordFilter()]
-    model.org_search = text_search.setup(
-        OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters
+    search_models.text_search = text_search.initialize_model(search_config.asymmetric)
+    content_index.org = text_search.setup(
+        OrgToJsonl, content_config.org, search_models.text_search.bi_encoder, regenerate=False, filters=filters
    )
    user_query = quote('How to git install application? -"clone"')

--- a/tests/test_image_search.py
+++ b/tests/test_image_search.py
@@ -5,9 +5,10 @@ from PIL import Image

 # External Packages
 import pytest
+from khoj.utils.config import SearchModels

 # Internal Packages
-from khoj.utils.state import model
+from khoj.utils.state import content_index, search_models
 from khoj.utils.constants import web_directory
 from khoj.search_type import image_search
 from khoj.utils.helpers import resolve_absolute_path
@@ -16,10 +17,12 @@ from khoj.utils.rawconfig import ContentConfig, SearchConfig

 # Test
 # ----------------------------------------------------------------------------------------------------
-def test_image_search_setup(content_config: ContentConfig, search_config: SearchConfig):
+def test_image_search_setup(content_config: ContentConfig, search_models: SearchModels):
    # Act
    # Regenerate image search embeddings during image setup
-    image_search_model = image_search.setup(content_config.image, search_config.image, regenerate=True)
+    image_search_model = image_search.setup(
+        content_config.image, search_models.image_search.image_encoder, regenerate=True
+    )

    # Assert
    assert len(image_search_model.image_names) == 3
@@ -54,8 +57,11 @@ def test_image_metadata(content_config: ContentConfig):
@pytest.mark.anyio
 async def test_image_search(content_config: ContentConfig, search_config: SearchConfig):
    # Arrange
+    search_models.image_search = image_search.initialize_model(search_config.image)
+    content_index.image = image_search.setup(
+        content_config.image, search_models.image_search.image_encoder, regenerate=False
+    )
    output_directory = resolve_absolute_path(web_directory)
-    model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
    query_expected_image_pairs = [
        ("kitten", "kitten_park.jpg"),
        ("horse and dog in a farm", "horse_dog.jpg"),
@@ -64,11 +70,13 @@ async def test_image_search(content_config: ContentConfig, search_config: Search

    # Act
    for query, expected_image_name in query_expected_image_pairs:
-        hits = await image_search.query(query, count=1, model=model.image_search)
+        hits = await image_search.query(
+            query, count=1, search_model=search_models.image_search, content=content_index.image
+        )

        results = image_search.collate_results(
            hits,
-            model.image_search.image_names,
+            content_index.image.image_names,
            output_directory=output_directory,
            image_files_url="/static/images",
            count=1,
@@ -90,7 +98,10 @@ async def test_image_search(content_config: ContentConfig, search_config: Search
@pytest.mark.anyio
 async def test_image_search_query_truncated(content_config: ContentConfig, search_config: SearchConfig, caplog):
    # Arrange
-    model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
+    search_models.image_search = image_search.initialize_model(search_config.image)
+    content_index.image = image_search.setup(
+        content_config.image, search_models.image_search.image_encoder, regenerate=False
+    )
    max_words_supported = 10
    query = " ".join(["hello"] * 100)
    truncated_query = " ".join(["hello"] * max_words_supported)
@@ -98,7 +109,9 @@ async def test_image_search_query_truncated(content_config: ContentConfig, searc
    # Act
    try:
        with caplog.at_level(logging.INFO, logger="khoj.search_type.image_search"):
-            await image_search.query(query, count=1, model=model.image_search)
+            await image_search.query(
+                query, count=1, search_model=search_models.image_search, content=content_index.image
+            )
    # Assert
    except RuntimeError as e:
        if "The size of tensor a (102) must match the size of tensor b (77)" in str(e):
@@ -110,8 +123,11 @@ async def test_image_search_query_truncated(content_config: ContentConfig, searc
@pytest.mark.anyio
 async def test_image_search_by_filepath(content_config: ContentConfig, search_config: SearchConfig, caplog):
    # Arrange
+    search_models.image_search = image_search.initialize_model(search_config.image)
+    content_index.image = image_search.setup(
+        content_config.image, search_models.image_search.image_encoder, regenerate=False
+    )
    output_directory = resolve_absolute_path(web_directory)
-    model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
    image_directory = content_config.image.input_directories[0]

    query = f"file:{image_directory.joinpath('kitten_park.jpg')}"
@@ -119,11 +135,13 @@ async def test_image_search_by_filepath(content_config: ContentConfig, search_co

    # Act
    with caplog.at_level(logging.INFO, logger="khoj.search_type.image_search"):
-        hits = await image_search.query(query, count=1, model=model.image_search)
+        hits = await image_search.query(
+            query, count=1, search_model=search_models.image_search, content=content_index.image
+        )

        results = image_search.collate_results(
            hits,
-            model.image_search.image_names,
+            content_index.image.image_names,
            output_directory=output_directory,
            image_files_url="/static/images",
            count=1,
--- a/tests/test_text_search.py
+++ b/tests/test_text_search.py
@@ -5,9 +5,11 @@ import os

 # External Packages
 import pytest
+import torch
+from khoj.utils.config import SearchModels

 # Internal Packages
-from khoj.utils.state import model
+from khoj.utils.state import content_index, search_models
 from khoj.search_type import text_search
 from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig
 from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
@@ -16,7 +18,7 @@ from khoj.processor.github.github_to_jsonl import GithubToJsonl

 # Test
 # ----------------------------------------------------------------------------------------------------
-def test_asymmetric_setup_with_missing_file_raises_error(
+def test_text_search_setup_with_missing_file_raises_error(
    org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig
 ):
    # Arrange
@@ -31,7 +33,7 @@ def test_asymmetric_setup_with_missing_file_raises_error(


 # ----------------------------------------------------------------------------------------------------
-def test_asymmetric_setup_with_empty_file_raises_error(
+def test_text_search_setup_with_empty_file_raises_error(
    org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig
 ):
    # Act
@@ -41,10 +43,12 @@ def test_asymmetric_setup_with_empty_file_raises_error(


 # ----------------------------------------------------------------------------------------------------
-def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchConfig):
+def test_text_search_setup(content_config: ContentConfig, search_models: SearchModels):
    # Act
    # Regenerate notes embeddings during asymmetric setup
-    notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)
+    notes_model = text_search.setup(
+        OrgToJsonl, content_config.org, search_models.text_search.bi_encoder, regenerate=True
+    )

    # Assert
    assert len(notes_model.entries) == 10
@@ -52,34 +56,39 @@ def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchCo


 # ----------------------------------------------------------------------------------------------------
-def test_text_content_index_only_updates_on_changes(content_config: ContentConfig, search_config: SearchConfig, caplog):
+def test_text_index_same_if_content_unchanged(content_config: ContentConfig, search_models: SearchModels, caplog):
    # Arrange
    caplog.set_level(logging.INFO, logger="khoj")

    # Act
    # Generate initial notes embeddings during asymmetric setup
-    text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)
+    text_search.setup(OrgToJsonl, content_config.org, search_models.text_search.bi_encoder, regenerate=True)
    initial_logs = caplog.text
    caplog.clear()  # Clear logs

    # Run asymmetric setup again with no changes to data source. Ensure index is not updated
-    text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False)
+    text_search.setup(OrgToJsonl, content_config.org, search_models.text_search.bi_encoder, regenerate=False)
    final_logs = caplog.text

    # Assert
-    assert "📩 Saved computed text embeddings to" in initial_logs
-    assert "📩 Saved computed text embeddings to" not in final_logs
+    assert "Creating index from scratch." in initial_logs
+    assert "Creating index from scratch." not in final_logs


 # ----------------------------------------------------------------------------------------------------
@pytest.mark.anyio
 async def test_asymmetric_search(content_config: ContentConfig, search_config: SearchConfig):
    # Arrange
-    model.notes_search = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)
+    search_models.text_search = text_search.initialize_model(search_config.asymmetric)
+    content_index.org = text_search.setup(
+        OrgToJsonl, content_config.org, search_models.text_search.bi_encoder, regenerate=True
+    )
    query = "How to git install application?"

    # Act
-    hits, entries = await text_search.query(query, model=model.notes_search, rank_results=True)
+    hits, entries = await text_search.query(
+        query, search_model=search_models.text_search, content=content_index.org, rank_results=True
+    )

    results = text_search.collate_results(hits, entries, count=1)

@@ -90,7 +99,7 @@ async def test_asymmetric_search(content_config: ContentConfig, search_config: S


 # ----------------------------------------------------------------------------------------------------
-def test_entry_chunking_by_max_tokens(org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig):
+def test_entry_chunking_by_max_tokens(org_config_with_only_new_file: TextContentConfig, search_models: SearchModels):
    # Arrange
    # Insert org-mode entry with size exceeding max token limit to new org file
    max_tokens = 256
@@ -103,7 +112,7 @@ def test_entry_chunking_by_max_tokens(org_config_with_only_new_file: TextContent
    # Act
    # reload embeddings, entries, notes model after adding new org-mode file
    initial_notes_model = text_search.setup(
-        OrgToJsonl, org_config_with_only_new_file, search_config.asymmetric, regenerate=False
+        OrgToJsonl, org_config_with_only_new_file, search_models.text_search.bi_encoder, regenerate=False
    )

    # Assert
@@ -113,9 +122,13 @@ def test_entry_chunking_by_max_tokens(org_config_with_only_new_file: TextContent


 # ----------------------------------------------------------------------------------------------------
-def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchConfig, new_org_file: Path):
+def test_regenerate_index_with_new_entry(
+    content_config: ContentConfig, search_models: SearchModels, new_org_file: Path
+):
    # Arrange
-    initial_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)
+    initial_notes_model = text_search.setup(
+        OrgToJsonl, content_config.org, search_models.text_search.bi_encoder, regenerate=True
+    )

    assert len(initial_notes_model.entries) == 10
    assert len(initial_notes_model.corpus_embeddings) == 10
@@ -125,23 +138,20 @@ def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchC
    with open(new_org_file, "w") as f:
        f.write("\n* A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n")

+    # Act
    # regenerate notes jsonl, model embeddings and model to include entry from new file
    regenerated_notes_model = text_search.setup(
-        OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True
+        OrgToJsonl, content_config.org, search_models.text_search.bi_encoder, regenerate=True
    )

-    # Act
-    # reload embeddings, entries, notes model from previously generated notes jsonl and model embeddings files
-    initial_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False)
-
    # Assert
    assert len(regenerated_notes_model.entries) == 11
    assert len(regenerated_notes_model.corpus_embeddings) == 11

-    # Assert
-    # verify new entry loaded from updated embeddings, entries
-    assert len(initial_notes_model.entries) == 11
-    assert len(initial_notes_model.corpus_embeddings) == 11
+    # verify new entry appended to index, without disrupting order or content of existing entries
+    error_details = compare_index(initial_notes_model, regenerated_notes_model)
+    if error_details:
+        pytest.fail(error_details, False)

    # Cleanup
    # reset input_files in config to empty list
@@ -149,26 +159,101 @@ def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchC


 # ----------------------------------------------------------------------------------------------------
-def test_incremental_update(content_config: ContentConfig, search_config: SearchConfig, new_org_file: Path):
+def test_update_index_with_duplicate_entries_in_stable_order(
+    org_config_with_only_new_file: TextContentConfig, search_models: SearchModels
+):
    # Arrange
-    initial_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)
+    new_file_to_index = Path(org_config_with_only_new_file.input_files[0])

-    assert len(initial_notes_model.entries) == 10
-    assert len(initial_notes_model.corpus_embeddings) == 10
+    # Insert org-mode entries with same compiled form into new org file
+    new_entry = "* TODO A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n"
+    with open(new_file_to_index, "w") as f:
+        f.write(f"{new_entry}{new_entry}")
+
+    # Act
+    # load embeddings, entries, notes model after adding new org-mode file
+    initial_index = text_search.setup(
+        OrgToJsonl, org_config_with_only_new_file, search_models.text_search.bi_encoder, regenerate=True
+    )
+
+    # update embeddings, entries, notes model after adding new org-mode file
+    updated_index = text_search.setup(
+        OrgToJsonl, org_config_with_only_new_file, search_models.text_search.bi_encoder, regenerate=False
+    )
+
+    # Assert
+    # verify only 1 entry added even if there are multiple duplicate entries
+    assert len(initial_index.entries) == len(updated_index.entries) == 1
+    assert len(initial_index.corpus_embeddings) == len(updated_index.corpus_embeddings) == 1
+
+    # verify the same entry is added even when there are multiple duplicate entries
+    error_details = compare_index(initial_index, updated_index)
+    if error_details:
+        pytest.fail(error_details)
+
+
+# ----------------------------------------------------------------------------------------------------
+def test_update_index_with_deleted_entry(org_config_with_only_new_file: TextContentConfig, search_models: SearchModels):
+    # Arrange
+    new_file_to_index = Path(org_config_with_only_new_file.input_files[0])
+
+    # Insert org-mode entries with same compiled form into new org file
+    new_entry = "* TODO A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n"
+    with open(new_file_to_index, "w") as f:
+        f.write(f"{new_entry}{new_entry} -- Tatooine")
+
+    # load embeddings, entries, notes model after adding new org file with 2 entries
+    initial_index = text_search.setup(
+        OrgToJsonl, org_config_with_only_new_file, search_models.text_search.bi_encoder, regenerate=True
+    )
+
+    # update embeddings, entries, notes model after removing an entry from the org file
+    with open(new_file_to_index, "w") as f:
+        f.write(f"{new_entry}")
+
+    # Act
+    updated_index = text_search.setup(
+        OrgToJsonl, org_config_with_only_new_file, search_models.text_search.bi_encoder, regenerate=False
+    )
+
+    # Assert
+    # verify only 1 entry added even if there are multiple duplicate entries
+    assert len(initial_index.entries) == len(updated_index.entries) + 1
+    assert len(initial_index.corpus_embeddings) == len(updated_index.corpus_embeddings) + 1
+
+    # verify the same entry is added even when there are multiple duplicate entries
+    error_details = compare_index(updated_index, initial_index)
+    if error_details:
+        pytest.fail(error_details)
+
+
+# ----------------------------------------------------------------------------------------------------
+def test_update_index_with_new_entry(content_config: ContentConfig, search_models: SearchModels, new_org_file: Path):
+    # Arrange
+    initial_notes_model = text_search.setup(
+        OrgToJsonl, content_config.org, search_models.text_search.bi_encoder, regenerate=True, normalize=False
+    )

    # append org-mode entry to first org input file in config
    with open(new_org_file, "w") as f:
-        f.write("\n* A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n")
+        new_entry = "\n* A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n"
+        f.write(new_entry)

    # Act
    # update embeddings, entries with the newly added note
    content_config.org.input_files = [f"{new_org_file}"]
-    initial_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False)
+    final_notes_model = text_search.setup(
+        OrgToJsonl, content_config.org, search_models.text_search.bi_encoder, regenerate=False, normalize=False
+    )

    # Assert
-    # verify new entry added in updated embeddings, entries
-    assert len(initial_notes_model.entries) == 11
-    assert len(initial_notes_model.corpus_embeddings) == 11
+    assert len(final_notes_model.entries) == len(initial_notes_model.entries) + 1
+    assert len(final_notes_model.corpus_embeddings) == len(initial_notes_model.corpus_embeddings) + 1
+
+    # verify new entry appended to index, without disrupting order or content of existing entries
+    error_details = compare_index(initial_notes_model, final_notes_model)
+    if error_details:
+        pytest.fail(error_details, False)

    # Cleanup
    # reset input_files in config to empty list
@@ -177,10 +262,34 @@ def test_incremental_update(content_config: ContentConfig, search_config: Search

 # ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(os.getenv("GITHUB_PAT_TOKEN") is None, reason="GITHUB_PAT_TOKEN not set")
-def test_asymmetric_setup_github(content_config: ContentConfig, search_config: SearchConfig):
+def test_asymmetric_setup_github(content_config: ContentConfig, search_models: SearchModels):
    # Act
    # Regenerate github embeddings to test asymmetric setup without caching
-    github_model = text_search.setup(GithubToJsonl, content_config.github, search_config.asymmetric, regenerate=True)
+    github_model = text_search.setup(
+        GithubToJsonl, content_config.github, search_models.text_search.bi_encoder, regenerate=True
+    )

    # Assert
    assert len(github_model.entries) > 1
+
+
+def compare_index(initial_notes_model, final_notes_model):
+    mismatched_entries, mismatched_embeddings = [], []
+    for index in range(len(initial_notes_model.entries)):
+        if initial_notes_model.entries[index].to_json() != final_notes_model.entries[index].to_json():
+            mismatched_entries.append(index)
+
+    # verify new entry embedding appended to embeddings tensor, without disrupting order or content of existing embeddings
+    for index in range(len(initial_notes_model.corpus_embeddings)):
+        if not torch.equal(final_notes_model.corpus_embeddings[index], initial_notes_model.corpus_embeddings[index]):
+            mismatched_embeddings.append(index)
+
+    error_details = ""
+    if mismatched_entries:
+        mismatched_entries_str = ",".join(map(str, mismatched_entries))
+        error_details += f"Entries at {mismatched_entries_str} not equal\n"
+    if mismatched_embeddings:
+        mismatched_embeddings_str = ", ".join(map(str, mismatched_embeddings))
+        error_details += f"Embeddings at {mismatched_embeddings_str} not equal\n"
+
+    return error_details
--- a/versions.json
+++ b/versions.json
@@ -12,5 +12,6 @@
 	"0.7.1": "0.15.0",
 	"0.8.0": "0.15.0",
 	"0.8.1": "0.15.0",
-	"0.8.2": "0.15.0"
+	"0.8.2": "0.15.0",
+	"0.9.0": "0.15.0"
 }
Author	SHA1	Message	Date
Debanjum Singh Solanky	3e59be7f1d	Release Khoj version 0.9.0	2023-07-18 19:59:27 -07:00
Debanjum Singh Solanky	d078e7b1f6	Clean up search type usage in khoj server, tests and Readme	2023-07-18 19:57:55 -07:00
Debanjum Singh Solanky	4d910936b7	Fix triggering index update on khoj server from khoj.el	2023-07-18 19:57:54 -07:00
Debanjum Singh Solanky	5c7d7f558d	Make AI model used for Khoj chat configurable from khoj.el - Fix bug. Set the unused model-name to a standad default value	2023-07-18 19:57:54 -07:00
Debanjum	5f2be2a9bb	Merge pull request #298 from HyunggyuJang/patch-1 Encode config as utf-8 during setup in khoj.el. This will allow utf-8 encoded files etc to be passed in config	2023-07-18 17:54:11 -07:00
Debanjum	3a1c5a6dab	Merge pull request #329 from khoj-ai/create-schema-migration-func-and-reindex-to-fix-corruption Create Schema Migrator and Reindex to Apply Index Corruption Fixes - `83e1088` Manage `khoj.yml' config migrations on app start. Version the `khoj.yml' schema - `429e1b4` Regenerate index to apply corruption fixes on first run of this khoj version Otherwise users would need to manually re-index their contents with khoj	2023-07-18 16:43:17 -07:00
Debanjum Singh Solanky	429e1b4b48	Regenerate index to apply corruption fixes on first run of new khoj	2023-07-18 16:10:47 -07:00
Debanjum Singh Solanky	83e1088d42	Manage khoj.yml config migrations on app start. Version the schema - Add version to khoj.yml schema Versioning the khoj.yml config schema will simplify future migrations	2023-07-18 16:10:10 -07:00
Debanjum Singh Solanky	71e8ddd9a2	Check if PDF is configured before showing it as an option in khoj.el	2023-07-17 15:49:20 -07:00
Debanjum	d00c5da8b7	Merge pull request #325 from khoj-ai/stablize-simplify-content-indexing ## Stabilize and Simplify Content Indexing ### Major Updates - `9bcca43` Unify logic to update entries when indexing from scratch or incrementally - `89c7819` Unify logic to update embeddings when indexing from scratch or incrementally - `6a0297c` Stable sort new entries when marking entries for update - `58d86d7` Unify logic to configure server from API or on server start - Create tests to ensure old entries, embeddings in index are unaffected on adding new entries - Refer: `1482fd4`, `7669b85`, `88d1a29` - `ad41ef3` Make normalization of embeddings configurable to test this in `c73feeb` ### Minor Updates - `1673bb5` Add todo state to compiled form of each entry - `6e70b91` Remove unused `dump_jsonl` helper method - `7ad9603` Improve naming of lock - `b02323a` Improve naming text search test methods Resolves #190	2023-07-17 14:51:10 -07:00
Debanjum Singh Solanky	3e3a1ecbc8	Start app even if server init fails to let user fix it Show stacktrace on error to help debugging	2023-07-17 14:33:02 -07:00
Debanjum Singh Solanky	ef6a0044f4	Drop embeddings of deleted text entries from index Previously the deleted embeddings would continue to be in the index, even after the entry was deleted	2023-07-16 03:47:05 -07:00
Debanjum Singh Solanky	c73feebf25	Test index embeddings are stable on incremental update & no norm Ensure order of new embedding insertion on incremental update does not affect the order and value of existing embeddings when normalization is turned off	2023-07-16 02:22:28 -07:00
Debanjum Singh Solanky	ad41ef3991	Make normalizing embeddings configurable	2023-07-16 02:16:33 -07:00
Debanjum Singh Solanky	1482fd4d4d	Test index is stable sorted on incremental update with new entry Ensure order of new embedding, entry insertion on incremental update is stable	2023-07-16 01:45:53 -07:00
Debanjum Singh Solanky	b02323ade6	Improve name of text search test functions Asymmetric was older name used to differentiate between symmetric, asymmetric search. Now that text search just uses asymmetric search stick to simpler name	2023-07-16 01:45:53 -07:00
Debanjum Singh Solanky	89c7819cb7	Unify logic to generate embeddings from scratch and incrementally This simplifies the `compute_embeddings' method and avoids potential later divergence in handling the index regenerate vs update scenarios	2023-07-16 01:45:53 -07:00
Debanjum Singh Solanky	6a0297cc86	Stable sort new entries when marking entries for update	2023-07-16 01:45:53 -07:00
Debanjum Singh Solanky	7669b85da6	Test index is stable sorted on regenerate with new entry	2023-07-16 01:45:53 -07:00
Debanjum Singh Solanky	6e70b914c2	Remove unused dump_jsonl method The entries index is stored ingzipped jsonl files for each content type	2023-07-16 01:45:53 -07:00
Debanjum Singh Solanky	9bcca43299	Use single func to handle indexing from scratch and incrementally Previous regenerate mechanism did not deduplicate entries with same key So entries looked different between regenerate and update Having single func, mark_entries_for_update, to handle both scenarios will avoid this divergence Update all text_to_jsonl methods to use the above method for generating index from scratch	2023-07-16 01:45:53 -07:00
Debanjum Singh Solanky	1673bb5558	Add todo state to compiled form of each org-mode entry	2023-07-16 01:45:53 -07:00
Debanjum Singh Solanky	88d1a29a84	Test index is stable for duplicate entries across regenerate, update - Current incorrect behavior: All entries with duplicate compiled form are kept on regenerate but on update only the last of the duplicated entries is kept This divergent behavior is not ideal to prevent index corruption across reconfigure and update	2023-07-16 01:45:53 -07:00
Debanjum Singh Solanky	da98b92dd4	Create helper function to test value, order of entries & embeddings This helper should be used to observe if the current embeddings are stable sorted on regenerate and incremental update of index in text search tests	2023-07-16 01:45:53 -07:00
Debanjum Singh Solanky	7ad96036b0	Improve lock name to config_lock instead of search_index_lock It is used to lock updates to all app config state, including processor	2023-07-16 01:45:53 -07:00
Debanjum Singh Solanky	58d86d7876	Use single func to configure server via API and on server start Improve error messages on failure to configure server components	2023-07-16 01:45:53 -07:00
sabaimran	a15711e635	Fix null type checks in get /config	2023-07-15 15:53:56 -07:00
sabaimran	e590d75b20	Start Khoj even when config is not valid (#320 ) * Add icon to indicate bad config, start Khoj even if there was an issue setting up the index	2023-07-15 14:11:54 -07:00
sabaimran	49ab201c30	Fix issues importing PySide in Docker container (#322 ) * Rather than installing PyQT dependencies, remove codepaths that require pyqt files in no-gui mode	2023-07-15 13:33:13 -07:00
sabaimran	ba47f2ab39	Merge branch 'master' of github.com:debanjum/khoj	2023-07-14 22:28:05 -07:00
sabaimran	874cffd256	Add additional support for parsing notion workspaces	2023-07-14 22:27:56 -07:00
Debanjum	52f68167ce	Merge pull request #317 from khoj-ai/reduce-memory-consumption-by-search-model-duplication Reuse Search Models across Content Types to reduce Memory Consumption - Memory consumption now only scales with search models used, not with content types. Previously each content type had it's own copy of the search ML models. That'd result in 300+ Mb per enabled text content type - Split model state into 2 separate state objects, `search_models` and `content_index`. This allows loading text_search and image_search models first and then reusing them across all content_types in content_index - The change should cut down memory utilization quite a bit for most users. I see a >50% drop in memory utilization on my Khoj instance. But this will vary for each user based on the amount of content indexed vs number of plugins enabled. - This change does not solve the RAM utilization scaling with size of the index, as the whole content index is still kept in RAM while Khoj is running Should help with #195, #301 and #303	2023-07-14 19:54:12 -07:00
Debanjum Singh Solanky	f08e9539f1	Release lock after updating index even if update fails to prevent deadlock Wrap acquire/release locks in try/catch/finally when updating content index and search models to prevent lock not being released on error and causing a deadlock	2023-07-14 16:57:27 -07:00
sabaimran	37f7f9fd1d	Add additional telemetry for system understanding (#316 ) * Add additional telemetry in order to understand which data sources are the most useful * Make actions side by side in the configuration page * Restore main run command * Update links to point to wiki pages for Github, Notion integrations * Stanardize nomenclature of the api_type to use _config suffix Remove header fields that aren't actually helpful for understanding config usage	2023-07-14 10:14:07 -07:00
Debanjum Singh Solanky	b9fb656657	Update Tests to setup both content_index, search_models before testing This is required by the updated structure of Khoj setup - Add content_config pytest fixture, pass bi_encoder from search_models.[text\|image]_search	2023-07-14 01:29:48 -07:00
Debanjum Singh Solanky	86e2bec9a0	Reuse Search Models across Content Types to Reduce Memory Consumption - Memory consumption now only scales with search models used, not with content types as well. Previously each content type had it's own copy of the search ML models. That'd result in 300+ Mb per enabled content type - Split model state into 2 separate state objects, `search_models' and `content_index'. This allows loading text_search and image_search models first and then reusing them across all content_types in content_index - This should cut down memory utilization quite a bit for most users. I see a ~50% drop in memory utilization. This will, of course, vary for each user based on the amount of content indexed vs number of plugins enabled - This does not solve the RAM utilization scaling with size of the index. As the whole content index is still kept in RAM while Khoj is running Should help with #195, #301 and #303	2023-07-14 01:27:22 -07:00
sabaimran	c2249eadb2	Add a Github workflow that allows you to build dev versions of Desktop applications (#309 ) * Add a Github workflow that allows you to build dev versions of Desktop applications * Add pull_request trigger for testing * Fix errant open quote in Package Khoj App step * Nix the release step, since this isn't associated with any tags - Set retention period for uploaded artifacts to 1 day * Remove pull_request trigger - limit to manual triggers and pushes to master	2023-07-13 22:11:39 -07:00
Debanjum	b2718d330c	Merge pull request #304 from migrate-from-pyqt-to-pyside Migrate from PyQT6 to PySide6	2023-07-13 11:54:47 -07:00
sabaimran	31e933207f	Set default values for sys.stdout if they're unavailable	2023-07-12 22:22:49 -07:00
Debanjum Singh Solanky	9c76150895	Migrate from PyQT6 to PySide6	2023-07-11 18:43:44 -07:00
Debanjum	83ed8561ee	Reduce size of Docker image and build it from local code - Improvements - Install Khoj on Docker from local code instead of pulling from Github - Reduce Khoj Docker image size by 2Gb by not caching installed pip packages. Refer [issue comment](https://github.com/khoj-ai/khoj/issues/148#issuecomment-1627443570)	2023-07-11 01:30:06 -07:00
HyunggyuJang	88c42b3043	Encode data as utf-8 otherwise it will complain, see `1c85531090`	2023-07-11 17:06:05 +09:00
Debanjum Singh Solanky	6308388dfc	Install Khoj on Docker from local app instead of pulling from github Just use a random static version for Khoj on the Docker as otherwise the hatch vcs dynamic versioning requires the .git directory in the docker image too	2023-07-11 00:41:05 -07:00
Debanjum Singh Solanky	802472cd99	Reduce Khoj Docker image size by 2Gb by not caching pip packages Resolve #148	2023-07-10 23:27:02 -07:00
Debanjum Singh Solanky	f664a74e77	Update Khoj server to run on non standard port, 42110 instead of 8000 Resolves #295	2023-07-10 21:27:58 -07:00
Debanjum Singh Solanky	bfd516c1a4	Deprecate (unmaintained) support to setup Khoj via Conda	2023-07-10 21:27:58 -07:00
Debanjum Singh Solanky	58c2c3b71a	Add Documentation to Release Khoj	2023-07-10 21:27:58 -07:00
sabaimran	effb52f859	Fix demo rendering with the new header	2023-07-10 21:16:19 -07:00
				`@@ -0,0 +1 @@`
				<svg xmlns="http://www.w3.org/2000/svg" shape-rendering="geometricPrecision" text-rendering="geometricPrecision" image-rendering="optimizeQuality" fill-rule="evenodd" clip-rule="evenodd" viewBox="0 0 512 512"><path fill-rule="nonzero" d="M256 0c70.69 0 134.7 28.66 181.02 74.98C483.34 121.31 512 185.31 512 256c0 70.69-28.66 134.7-74.98 181.02C390.7 483.34 326.69 512 256 512c-70.69 0-134.69-28.66-181.02-74.98C28.66 390.7 0 326.69 0 256c0-70.69 28.66-134.69 74.98-181.02C121.31 28.66 185.31 0 256 0zm-21.49 301.51v-2.03c.16-13.46 1.48-24.12 4.07-32.05 2.54-7.92 6.19-14.37 10.97-19.25 4.77-4.92 10.51-9.39 17.22-13.46 4.31-2.74 8.22-5.78 11.68-9.18 3.45-3.36 6.19-7.27 8.23-11.69 2.02-4.37 3.04-9.24 3.04-14.62 0-6.4-1.52-11.94-4.57-16.66-3-4.68-7.06-8.28-12.04-10.87-5.03-2.54-10.61-3.81-16.76-3.81-5.53 0-10.81 1.11-15.89 3.45-5.03 2.29-9.25 5.89-12.55 10.77-3.3 4.87-5.23 11.12-5.74 18.74h-32.91c.51-12.95 3.81-23.92 9.85-32.91 6.1-8.99 14.13-15.8 24.08-20.42 10.01-4.62 21.08-6.9 33.16-6.9 13.31 0 24.89 2.43 34.84 7.41 9.96 4.93 17.73 11.83 23.27 20.67 5.48 8.84 8.28 19.1 8.28 30.88 0 8.08-1.27 15.34-3.81 21.79-2.54 6.45-6.1 12.24-10.77 17.27-4.68 5.08-10.21 9.54-16.71 13.41-6.15 3.86-11.12 7.82-14.88 11.93-3.81 4.11-6.56 8.99-8.28 14.58-1.73 5.63-2.69 12.59-2.84 20.92v2.03h-30.94zm16.36 65.82c-5.94-.04-11.02-2.13-15.29-6.35-4.26-4.21-6.35-9.34-6.35-15.33 0-5.89 2.09-10.97 6.35-15.19 4.27-4.21 9.35-6.35 15.29-6.35 5.84 0 10.92 2.14 15.18 6.35 4.32 4.22 6.45 9.3 6.45 15.19 0 3.96-1.01 7.62-2.99 10.87-1.98 3.3-4.57 5.94-7.82 7.87-3.25 1.93-6.86 2.9-10.82 2.94zM417.71 94.29C376.33 52.92 319.15 27.32 256 27.32c-63.15 0-120.32 25.6-161.71 66.97C52.92 135.68 27.32 192.85 27.32 256c0 63.15 25.6 120.33 66.97 161.71 41.39 41.37 98.56 66.97 161.71 66.97 63.15 0 120.33-25.6 161.71-66.97 41.37-41.38 66.97-98.56 66.97-161.71 0-63.15-25.6-120.32-66.97-161.71z"/></svg>