diff --git a/default.nix b/default.nix index 60dbceb..738dcf5 100644 --- a/default.nix +++ b/default.nix @@ -406,36 +406,52 @@ in ''; }; - minSize = mkOption { - type = types.ints.between 3 1000; - default = 3; - description = "Minimum size of search terms"; - }; - memoryLimit = mkOption { - type = types.nullOr types.int; - default = null; - example = 2000; - description = "Memory limit for the indexer process, in MiB. If null, leaves the default (which is rather low), and if 0, no limit."; + languages = mkOption { + type = types.nonEmptyListOf types.str; + default = [ "en" ]; + example = [ "en" "de" ]; + description = '' + A list of languages that the full text search should detect. + At least one language must be specified. + The language listed first is the default and is used when language recognition fails. + See . + ''; }; - maintenance = { - enable = mkOption { - type = types.bool; - default = true; - description = "Regularly optmize indices, as recommended by upstream."; - }; + substringSearch = mkOption { + type = types.bool; + default = false; + description = '' + If enabled, allows substring searches. + See . + ''; + }; - onCalendar = mkOption { - type = types.str; - default = "daily"; - description = "When to run the maintenance job. See systemd.time(7) for more information about the format."; - }; + headerExcludes = mkOption { + type = types.listOf types.str; + default = [ + "Received" + "DKIM-*" + "X-*" + "Comments" + ]; + description = '' + The list of headers to exclude. + See . + ''; + }; - randomizedDelaySec = mkOption { - type = types.int; - default = 1000; - description = "Run the maintenance job not exactly at the time specified with `onCalendar`, but plus or minus this many seconds."; - }; + filters = mkOption { + type = types.listOf types.str; + default = [ + "normalizer-icu" + "snowball" + "stopwords" + ]; + description = '' + The list of filters to apply. + . + ''; }; }; @@ -1289,6 +1305,21 @@ in }; imports = [ + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "memoryLimit" ] '' + This option is not needed for fts-flatcurve + '') + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "enable" ] '' + This option is not needed for fts-flatcurve + '') + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "onCalendar" ] '' + This option is not needed for fts-flatcurve + '') + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "randomizedDelaySec" ] '' + This option is not needed for fts-flatcurve + '') + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "minSize" ] '' + This option is not supported by fts-flatcurve + '') (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maxSize" ] '' This option is not needed since fts-xapian 1.8.3 '') diff --git a/docs/fts.rst b/docs/fts.rst index 780ae3e..965b72e 100644 --- a/docs/fts.rst +++ b/docs/fts.rst @@ -4,7 +4,7 @@ Full text search By default, when your IMAP client searches for an email containing some text in its *body*, dovecot will read all your email sequentially. This is very slow and IO intensive. To speed body searches up, it is possible to -*index* emails with a plugin to dovecot, ``fts_xapian``. +*index* emails with a plugin to dovecot, ``fts_flatcurve``. Enabling full text search ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -50,16 +50,15 @@ recommended to move indices in a different location, such as Indexation itself is rather resouces intensive, in CPU, and for emails with large headers, in memory as well. Initial indexation of existing emails can take -hours. If the indexer worker is killed or segfaults during indexation, it can -be that it tried to allocate more memory than allowed. You can increase the memory -limit by eg ``mailserver.fullTextSearch.memoryLimit = 2000`` (in MiB). +hours. Mitigating resources requirements ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can: -* increase the minimum search term size ``mailserver.fullTextSearch.minSize`` +* exclude some headers from indexation with ``mailserver.fullTextSearch.headerExcludes`` +* disable expensive token normalisation in ``mailserver.fullTextSearch.filters`` * disable automatic indexation for some folders with ``mailserver.fullTextSearch.autoIndexExclude``. Folders can be specified by name (``"Trash"``), by special use (``"\\Junk"``) or with a wildcard. diff --git a/docs/release-notes.rst b/docs/release-notes.rst index 806de8e..56e25b7 100644 --- a/docs/release-notes.rst +++ b/docs/release-notes.rst @@ -1,6 +1,21 @@ Release Notes ============= +NixOS 25.05 +----------- + +- Switch to the more efficient `fts-flatcurve` indexer for full text search + (`merge request `__). + This makes use of a new index, which will be automatically re-generated the + next time a folder is searched. + The operation is now quick enough to be performed "just-in-time". + Alternatively, all indices can be immediately re-generated for all users and + folders by running + `doveadm fts rescan -u '*' && doveadm index -u '*' -q '*'`. + The previous index (which is not automatically discarded to allow rollbacks) + can be cleaned up by removing all the `xapian-indexes` directories within + `mailserver.indexDir`. + NixOS 24.11 ----------- diff --git a/flake.lock b/flake.lock index c6ec247..0bb033a 100644 --- a/flake.lock +++ b/flake.lock @@ -19,11 +19,11 @@ "flake-compat": { "flake": false, "locked": { - "lastModified": 1696426674, - "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", + "lastModified": 1733328505, + "narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=", "owner": "edolstra", "repo": "flake-compat", - "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", + "rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec", "type": "github" }, "original": { @@ -34,11 +34,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1732014248, - "narHash": "sha256-y/MEyuJ5oBWrWAic/14LaIr/u5E0wRVzyYsouYY3W6w=", + "lastModified": 1739214665, + "narHash": "sha256-26L8VAu3/1YRxS8MHgBOyOM8xALdo6N0I04PgorE7UM=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "23e89b7da85c3640bbc2173fe04f4bd114342367", + "rev": "64e75cd44acf21c7933d61d7721e812eac1b5a0a", "type": "github" }, "original": { @@ -49,11 +49,11 @@ }, "nixpkgs-24_11": { "locked": { - "lastModified": 1734083684, - "narHash": "sha256-5fNndbndxSx5d+C/D0p/VF32xDiJCJzyOqorOYW4JEo=", + "lastModified": 1739357830, + "narHash": "sha256-9xim3nJJUFbVbJCz48UP4fGRStVW5nv4VdbimbKxJ3I=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "314e12ba369ccdb9b352a4db26ff419f7c49fa84", + "rev": "0ff09db9d034a04acd4e8908820ba0b410d7a33a", "type": "github" }, "original": { diff --git a/mail-server/dovecot.nix b/mail-server/dovecot.nix index 8e6d2b2..baa3a7e 100644 --- a/mail-server/dovecot.nix +++ b/mail-server/dovecot.nix @@ -27,6 +27,12 @@ let # This file contains the ldap bind password ldapConfFile = "${passwdDir}/dovecot-ldap.conf.ext"; bool2int = x: if x then "1" else "0"; + boolToYesNo = x: if x then "yes" else "no"; + listToLine = lib.concatStringsSep " "; + listToMultiAttrs = keyPrefix: attrs: lib.listToAttrs (lib.imap1 (n: x: { + name = "${keyPrefix}${if n==1 then "" else toString n}"; + value = x; + }) attrs); maildirLayoutAppendix = lib.optionalString cfg.useFsLayout ":LAYOUT=fs"; maildirUTF8FolderNames = lib.optionalString cfg.useUTF8FolderNames ":UTF-8"; @@ -145,10 +151,22 @@ let dovecotModules = [ pkgs.dovecot_pigeonhole - ] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot_fts_xapian; + ] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot-fts-flatcurve; # Remove and assume `false` after NixOS 25.05 haveDovecotModulesOption = options.services.dovecot2 ? "modules" && (options.services.dovecot2.modules.visible or true); + ftsPluginSettings = { + fts = "flatcurve"; + fts_languages = listToLine cfg.fullTextSearch.languages; + fts_tokenizers = listToLine [ "generic" "email-address" ]; + fts_tokenizer_email_address = "maxlen=100"; # default 254 too large for Xapian + fts_flatcurve_substring_search = boolToYesNo cfg.fullTextSearch.substringSearch; + fts_filters = listToLine cfg.fullTextSearch.filters; + fts_header_excludes = listToLine cfg.fullTextSearch.headerExcludes; + fts_autoindex = boolToYesNo cfg.fullTextSearch.autoIndex; + fts_enforced = cfg.fullTextSearch.enforced; + } // (listToMultiAttrs "fts_autoindex_exclude" cfg.fullTextSearch.autoIndexExclude); + in { config = with cfg; lib.mkIf enable { @@ -183,14 +201,17 @@ in sslServerCert = certificatePath; sslServerKey = keyPath; enableLmtp = true; - mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ "fts" "fts_xapian" ]; + mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ + "fts" + "fts_flatcurve" + ]; protocols = lib.optional cfg.enableManageSieve "sieve"; pluginSettings = { sieve = "file:${cfg.sieveDirectory}/%{user}/scripts;active=${cfg.sieveDirectory}/%{user}/active.sieve"; sieve_default = "file:${cfg.sieveDirectory}/%{user}/default.sieve"; sieve_default_name = "default"; - }; + } // (lib.optionalAttrs cfg.fullTextSearch.enable ftsPluginSettings); sieve = { extensions = [ @@ -358,27 +379,6 @@ in inbox = yes } - ${lib.optionalString cfg.fullTextSearch.enable '' - plugin { - plugin = fts fts_xapian - fts = xapian - fts_xapian = partial=${toString cfg.fullTextSearch.minSize} verbose=${bool2int cfg.debug} - - fts_autoindex = ${if cfg.fullTextSearch.autoIndex then "yes" else "no"} - - ${lib.strings.concatImapStringsSep "\n" (n: x: "fts_autoindex_exclude${if n==1 then "" else toString n} = ${x}") cfg.fullTextSearch.autoIndexExclude} - - fts_enforced = ${cfg.fullTextSearch.enforced} - } - - service indexer-worker { - ${lib.optionalString (cfg.fullTextSearch.memoryLimit != null) '' - vsz_limit = ${toString (cfg.fullTextSearch.memoryLimit*1024*1024)} - ''} - process_limit = 0 - } - ''} - lda_mailbox_autosubscribe = yes lda_mailbox_autocreate = yes ''; @@ -395,29 +395,5 @@ in }; systemd.services.postfix.restartTriggers = [ genPasswdScript ] ++ (lib.optional cfg.ldap.enable [setPwdInLdapConfFile]); - - systemd.services.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable) { - description = "Optimize dovecot indices for fts_xapian"; - requisite = [ "dovecot2.service" ]; - after = [ "dovecot2.service" ]; - startAt = cfg.fullTextSearch.maintenance.onCalendar; - serviceConfig = { - Type = "oneshot"; - ExecStart = "${pkgs.dovecot}/bin/doveadm fts optimize -A"; - PrivateDevices = true; - PrivateNetwork = true; - ProtectKernelTunables = true; - ProtectKernelModules = true; - ProtectControlGroups = true; - ProtectHome = true; - ProtectSystem = true; - PrivateTmp = true; - }; - }; - systemd.timers.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable && cfg.fullTextSearch.maintenance.randomizedDelaySec != 0) { - timerConfig = { - RandomizedDelaySec = cfg.fullTextSearch.maintenance.randomizedDelaySec; - }; - }; }; } diff --git a/tests/external.nix b/tests/external.nix index 7579b6d..7dea601 100644 --- a/tests/external.nix +++ b/tests/external.nix @@ -81,8 +81,6 @@ pkgs.nixosTest { # special use depends on https://github.com/NixOS/nixpkgs/pull/93201 autoIndexExclude = [ (if (pkgs.lib.versionAtLeast pkgs.lib.version "21") then "\\Junk" else "Junk") ]; enforced = "yes"; - # fts-xapian warns when memory is low, which makes the test fail - memoryLimit = 100000; }; }; }; @@ -493,11 +491,9 @@ pkgs.nixosTest { # should fail because this folder is not indexed client.fail("search Junk a >&2") # check that search really goes through the indexer - server.succeed( - "journalctl -u dovecot2 | grep -E 'indexer-worker.* Done indexing .INBOX.' >&2" - ) + server.succeed("journalctl -u dovecot2 | grep 'fts-flatcurve(INBOX): Query ' >&2") # check that Junk is not indexed - server.fail("journalctl -u dovecot2 | grep 'indexer-worker' | grep -i 'JUNK' >&2") + server.fail("journalctl -u dovecot2 | grep 'fts-flatcurve(JUNK): Indexing ' >&2") with subtest("dmarc reporting"): server.systemctl("start rspamd-dmarc-reporter.service") @@ -507,8 +503,6 @@ pkgs.nixosTest { server.fail("journalctl -u postfix | grep -i warning >&2") server.fail("journalctl -u dovecot2 | grep -v 'imap-login: Debug: SSL error: Connection closed' | grep -i error >&2") # harmless ? https://dovecot.org/pipermail/dovecot/2020-August/119575.html - server.fail( - "journalctl -u dovecot2 |grep -v 'Expunged message reappeared, giving a new UID'| grep -v 'FTS Xapian: Box is empty' | grep -vE 'FTS Xapian:.*does not exist. Creating it' | grep -i warning >&2" - ) + server.fail("journalctl -u dovecot2 | grep -v 'Expunged message reappeared, giving a new UID' | grep -i warning >&2") ''; }