From 2ed7a9478284fa122b48632024dc4e7be428ca9f Mon Sep 17 00:00:00 2001 From: euxane Date: Fri, 24 Jan 2025 17:36:40 +0100 Subject: [PATCH 1/4] dovecot/fts: switch to fts-flatcurve This switches the full-text search plugin from fts-xapian to fts-flatcurve, the now preferred indexer still powered by Xapian, which will be integrated into Dovecot core 2.4. This sets a sane minimal configuration for the plugin with international language support. The plugin options marked as "advanced" in Dovecot's documentation aren't re-exposed for simplicity. They can nevertheless be overridden by module consumers by directly setting keys with `services.dovecot2.pluginSettings.fts_*`. The `fullTextSearch.maintenance` option is removed as the index is now incrementally optimised in the background. GitLab: closes https://gitlab.com/simple-nixos-mailserver/nixos-mailserver/-/issues/239 --- default.nix | 98 +++++++++++++++++++++++++++++------------ docs/fts.rst | 5 ++- mail-server/dovecot.nix | 67 ++++++++++------------------ tests/external.nix | 18 ++------ 4 files changed, 101 insertions(+), 87 deletions(-) diff --git a/default.nix b/default.nix index 3f46610..aaa0987 100644 --- a/default.nix +++ b/default.nix @@ -380,7 +380,21 @@ in }; fullTextSearch = { - enable = mkEnableOption "Full text search indexing with xapian. This has significant performance and disk space cost."; + enable = mkEnableOption '' + Full text search indexing with Xapian through the fts_flatcurve plugin. + This has significant performance and disk space cost. + ''; + memoryLimit = mkOption { + type = types.nullOr types.int; + default = null; + example = 2000; + description = '' + Memory limit for the indexer process, in MiB. + If null, leaves the default (which is rather low), + and if 0, no limit. + ''; + }; + autoIndex = mkOption { type = types.bool; default = true; @@ -406,36 +420,54 @@ in ''; }; - minSize = mkOption { - type = types.ints.between 3 1000; - default = 3; - description = "Minimum size of search terms"; - }; - memoryLimit = mkOption { - type = types.nullOr types.int; - default = null; - example = 2000; - description = "Memory limit for the indexer process, in MiB. If null, leaves the default (which is rather low), and if 0, no limit."; + languages = mkOption { + type = types.nonEmptyListOf types.str; + default = [ "en" ]; + example = [ "en" "de" ]; + description = '' + A list of languages that the full text search should detect. + At least one language must be specified. + The language listed first is the default and is used when language recognition fails. + See . + ''; }; - maintenance = { - enable = mkOption { - type = types.bool; - default = true; - description = "Regularly optmize indices, as recommended by upstream."; - }; + substringSearch = mkOption { + type = types.bool; + default = false; + description = '' + If enabled, allows substring searches. + See . - onCalendar = mkOption { - type = types.str; - default = "daily"; - description = "When to run the maintenance job. See systemd.time(7) for more information about the format."; - }; + Enabling this requires significant additional storage space. + ''; + }; - randomizedDelaySec = mkOption { - type = types.int; - default = 1000; - description = "Run the maintenance job not exactly at the time specified with `onCalendar`, but plus or minus this many seconds."; - }; + headerExcludes = mkOption { + type = types.listOf types.str; + default = [ + "Received" + "DKIM-*" + "X-*" + "Comments" + ]; + description = '' + The list of headers to exclude. + See . + ''; + }; + + filters = mkOption { + type = types.listOf types.str; + default = [ + "normalizer-icu" + "snowball" + "stopwords" + ]; + description = '' + The list of filters to apply. + . + ''; }; }; @@ -1269,6 +1301,18 @@ in }; imports = [ + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "enable" ] '' + This option is not needed for fts-flatcurve + '') + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "onCalendar" ] '' + This option is not needed for fts-flatcurve + '') + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "randomizedDelaySec" ] '' + This option is not needed for fts-flatcurve + '') + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "minSize" ] '' + This option is not supported by fts-flatcurve + '') (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maxSize" ] '' This option is not needed since fts-xapian 1.8.3 '') diff --git a/docs/fts.rst b/docs/fts.rst index 780ae3e..bb2fe88 100644 --- a/docs/fts.rst +++ b/docs/fts.rst @@ -4,7 +4,7 @@ Full text search By default, when your IMAP client searches for an email containing some text in its *body*, dovecot will read all your email sequentially. This is very slow and IO intensive. To speed body searches up, it is possible to -*index* emails with a plugin to dovecot, ``fts_xapian``. +*index* emails with a plugin to dovecot, ``fts_flatcurve``. Enabling full text search ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -59,7 +59,8 @@ Mitigating resources requirements You can: -* increase the minimum search term size ``mailserver.fullTextSearch.minSize`` +* exclude some headers from indexation with ``mailserver.fullTextSearch.headerExcludes`` +* disable expensive token normalisation in ``mailserver.fullTextSearch.filters`` * disable automatic indexation for some folders with ``mailserver.fullTextSearch.autoIndexExclude``. Folders can be specified by name (``"Trash"``), by special use (``"\\Junk"``) or with a wildcard. diff --git a/mail-server/dovecot.nix b/mail-server/dovecot.nix index 6704426..ee8db25 100644 --- a/mail-server/dovecot.nix +++ b/mail-server/dovecot.nix @@ -26,7 +26,12 @@ let userdbFile = "${passwdDir}/userdb"; # This file contains the ldap bind password ldapConfFile = "${passwdDir}/dovecot-ldap.conf.ext"; - bool2int = x: if x then "1" else "0"; + boolToYesNo = x: if x then "yes" else "no"; + listToLine = lib.concatStringsSep " "; + listToMultiAttrs = keyPrefix: attrs: lib.listToAttrs (lib.imap1 (n: x: { + name = "${keyPrefix}${if n==1 then "" else toString n}"; + value = x; + }) attrs); maildirLayoutAppendix = lib.optionalString cfg.useFsLayout ":LAYOUT=fs"; maildirUTF8FolderNames = lib.optionalString cfg.useUTF8FolderNames ":UTF-8"; @@ -122,10 +127,22 @@ let dovecotModules = [ pkgs.dovecot_pigeonhole - ] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot_fts_xapian; + ] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot-fts-flatcurve; # Remove and assume `false` after NixOS 25.05 haveDovecotModulesOption = options.services.dovecot2 ? "modules" && (options.services.dovecot2.modules.visible or true); + ftsPluginSettings = { + fts = "flatcurve"; + fts_languages = listToLine cfg.fullTextSearch.languages; + fts_tokenizers = listToLine [ "generic" "email-address" ]; + fts_tokenizer_email_address = "maxlen=100"; # default 254 too large for Xapian + fts_flatcurve_substring_search = boolToYesNo cfg.fullTextSearch.substringSearch; + fts_filters = listToLine cfg.fullTextSearch.filters; + fts_header_excludes = listToLine cfg.fullTextSearch.headerExcludes; + fts_autoindex = boolToYesNo cfg.fullTextSearch.autoIndex; + fts_enforced = cfg.fullTextSearch.enforced; + } // (listToMultiAttrs "fts_autoindex_exclude" cfg.fullTextSearch.autoIndexExclude); + in { config = with cfg; lib.mkIf enable { @@ -160,14 +177,17 @@ in sslServerCert = certificatePath; sslServerKey = keyPath; enableLmtp = true; - mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ "fts" "fts_xapian" ]; + mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ + "fts" + "fts_flatcurve" + ]; protocols = lib.optional cfg.enableManageSieve "sieve"; pluginSettings = { sieve = "file:${cfg.sieveDirectory}/%{user}/scripts;active=${cfg.sieveDirectory}/%{user}/active.sieve"; sieve_default = "file:${cfg.sieveDirectory}/%{user}/default.sieve"; sieve_default_name = "default"; - }; + } // (lib.optionalAttrs cfg.fullTextSearch.enable ftsPluginSettings); sieve = { extensions = [ @@ -341,26 +361,11 @@ in inbox = yes } - ${lib.optionalString cfg.fullTextSearch.enable '' - plugin { - plugin = fts fts_xapian - fts = xapian - fts_xapian = partial=${toString cfg.fullTextSearch.minSize} verbose=${bool2int cfg.debug} - - fts_autoindex = ${if cfg.fullTextSearch.autoIndex then "yes" else "no"} - - ${lib.strings.concatImapStringsSep "\n" (n: x: "fts_autoindex_exclude${if n==1 then "" else toString n} = ${x}") cfg.fullTextSearch.autoIndexExclude} - - fts_enforced = ${cfg.fullTextSearch.enforced} - } - service indexer-worker { ${lib.optionalString (cfg.fullTextSearch.memoryLimit != null) '' vsz_limit = ${toString (cfg.fullTextSearch.memoryLimit*1024*1024)} ''} - process_limit = 0 } - ''} lda_mailbox_autosubscribe = yes lda_mailbox_autocreate = yes @@ -378,29 +383,5 @@ in }; systemd.services.postfix.restartTriggers = [ genPasswdScript ] ++ (lib.optional cfg.ldap.enable [setPwdInLdapConfFile]); - - systemd.services.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable) { - description = "Optimize dovecot indices for fts_xapian"; - requisite = [ "dovecot2.service" ]; - after = [ "dovecot2.service" ]; - startAt = cfg.fullTextSearch.maintenance.onCalendar; - serviceConfig = { - Type = "oneshot"; - ExecStart = "${pkgs.dovecot}/bin/doveadm fts optimize -A"; - PrivateDevices = true; - PrivateNetwork = true; - ProtectKernelTunables = true; - ProtectKernelModules = true; - ProtectControlGroups = true; - ProtectHome = true; - ProtectSystem = true; - PrivateTmp = true; - }; - }; - systemd.timers.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable && cfg.fullTextSearch.maintenance.randomizedDelaySec != 0) { - timerConfig = { - RandomizedDelaySec = cfg.fullTextSearch.maintenance.randomizedDelaySec; - }; - }; }; } diff --git a/tests/external.nix b/tests/external.nix index c32a9e1..0f51d46 100644 --- a/tests/external.nix +++ b/tests/external.nix @@ -82,8 +82,6 @@ # special use depends on https://github.com/NixOS/nixpkgs/pull/93201 autoIndexExclude = [ (if (pkgs.lib.versionAtLeast pkgs.lib.version "21") then "\\Junk" else "Junk") ]; enforced = "yes"; - # fts-xapian warns when memory is low, which makes the test fail - memoryLimit = 100000; }; }; }; @@ -493,11 +491,9 @@ # should fail because this folder is not indexed client.fail("search Junk a >&2") # check that search really goes through the indexer - server.succeed( - "journalctl -u dovecot2 | grep -E 'indexer-worker.* Done indexing .INBOX.' >&2" - ) + server.succeed("journalctl -u dovecot2 | grep 'fts-flatcurve(INBOX): Query ' >&2") # check that Junk is not indexed - server.fail("journalctl -u dovecot2 | grep 'indexer-worker' | grep -i 'JUNK' >&2") + server.fail("journalctl -u dovecot2 | grep 'fts-flatcurve(JUNK): Indexing ' >&2") with subtest("dmarc reporting"): server.systemctl("start rspamd-dmarc-reporter.service") @@ -507,14 +503,6 @@ server.fail("journalctl -u postfix | grep -i warning >&2") server.fail("journalctl -u dovecot2 | grep -v 'imap-login: Debug: SSL error: Connection closed' | grep -i error >&2") # harmless ? https://dovecot.org/pipermail/dovecot/2020-August/119575.html - server.fail( - "journalctl -u dovecot2 | \ - grep -v 'Expunged message reappeared, giving a new UID' | \ - grep -v 'FTS Xapian: Box is empty' | \ - grep -v 'FTS Xapian: New version of the plugin' | \ - grep -vE 'FTS Xapian:.*does not exist. Creating it' | \ - grep -vE 'FTS Xapian:.*indexes do not exist. Initializing DB' | \ - grep -i warning >&2" - ) + server.fail("journalctl -u dovecot2 | grep -v 'Expunged message reappeared, giving a new UID' | grep -i warning >&2") ''; } From e287d83ab1818ac34b510a58b15dd44012f59c3c Mon Sep 17 00:00:00 2001 From: euxane Date: Thu, 30 Jan 2025 21:06:23 +0100 Subject: [PATCH 2/4] release-notes: mention switch to fts-flatcurve for FTS --- docs/release-notes.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/release-notes.rst b/docs/release-notes.rst index f6511ee..8cee0bd 100644 --- a/docs/release-notes.rst +++ b/docs/release-notes.rst @@ -12,6 +12,17 @@ NixOS 25.05 - If you need to revert TCP connections, configure ``mailserver.redis.address`` to reference the value of ``config.services.redis.servers.rspamd.bind``. - The integration with policyd-spf was removed and SPF handling is now fully based on Rspamd scoring. (`merge request `__) +- Switch to the more efficient `fts-flatcurve` indexer for full text search + (`merge request `__). + This makes use of a new index, which will be automatically re-generated the + next time a folder is searched. + The operation is now quick enough to be performed "just-in-time". + Alternatively, all indices can be immediately re-generated for all users and + folders by running + `doveadm fts rescan -u '*' && doveadm index -u '*' -q '*'`. + The previous index (which is not automatically discarded to allow rollbacks) + can be cleaned up by removing all the `xapian-indexes` directories within + `mailserver.indexDir`. - Individual domains can now be excluded from DMARC Reporting through ``mailserver.dmarcReporting.excludedDomains``. (`merge request `__) - Configuring ``mailserver.forwards`` is now possible when the setup relies on LDAP. From 0cbdf465e49e4bc6a4625d0610fa73f9b469a39d Mon Sep 17 00:00:00 2001 From: euxane Date: Mon, 19 May 2025 16:36:50 +0200 Subject: [PATCH 3/4] dovecot/fts: warn on stopwords filter with multiple languages --- mail-server/dovecot.nix | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/mail-server/dovecot.nix b/mail-server/dovecot.nix index ee8db25..56cebf2 100644 --- a/mail-server/dovecot.nix +++ b/mail-server/dovecot.nix @@ -153,6 +153,20 @@ in } ]; + warnings = + (lib.optional ( + (builtins.length cfg.fullTextSearch.languages > 1) && + (builtins.elem "stopwords" cfg.fullTextSearch.filters) + ) '' + Using stopwords in `mailserver.fullTextSearch.filters` with multiple + languages in `mailserver.fullTextSearch.languages` configured WILL + cause some searches to fail. + + The recommended solution is to NOT use the stopword filter when + multiple languages are present in the configuration. + '') + ; + # for sieve-test. Shelling it in on demand usually doesnt' work, as it reads # the global config and tries to open shared libraries configured in there, # which are usually not compatible. From 826a3b2fcf68173cd93e1f7664dee68e01d6a175 Mon Sep 17 00:00:00 2001 From: euxane Date: Mon, 19 May 2025 17:13:11 +0200 Subject: [PATCH 4/4] tests/external: ignore time adjustments warnings Seems to be happening randomly during tests: dovecot: master: Warning: Time moved forwards by 0.101534 seconds - adjusting timeouts. --- tests/external.nix | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/external.nix b/tests/external.nix index 0f51d46..a65f0ce 100644 --- a/tests/external.nix +++ b/tests/external.nix @@ -503,6 +503,11 @@ server.fail("journalctl -u postfix | grep -i warning >&2") server.fail("journalctl -u dovecot2 | grep -v 'imap-login: Debug: SSL error: Connection closed' | grep -i error >&2") # harmless ? https://dovecot.org/pipermail/dovecot/2020-August/119575.html - server.fail("journalctl -u dovecot2 | grep -v 'Expunged message reappeared, giving a new UID' | grep -i warning >&2") + server.fail( + "journalctl -u dovecot2 | \ + grep -v 'Expunged message reappeared, giving a new UID' | \ + grep -v 'Time moved forwards' | \ + grep -i warning >&2" + ) ''; }