diff --git a/default.nix b/default.nix index 60dbceb..738dcf5 100644 --- a/default.nix +++ b/default.nix @@ -406,36 +406,52 @@ in ''; }; - minSize = mkOption { - type = types.ints.between 3 1000; - default = 3; - description = "Minimum size of search terms"; - }; - memoryLimit = mkOption { - type = types.nullOr types.int; - default = null; - example = 2000; - description = "Memory limit for the indexer process, in MiB. If null, leaves the default (which is rather low), and if 0, no limit."; + languages = mkOption { + type = types.nonEmptyListOf types.str; + default = [ "en" ]; + example = [ "en" "de" ]; + description = '' + A list of languages that the full text search should detect. + At least one language must be specified. + The language listed first is the default and is used when language recognition fails. + See . + ''; }; - maintenance = { - enable = mkOption { - type = types.bool; - default = true; - description = "Regularly optmize indices, as recommended by upstream."; - }; + substringSearch = mkOption { + type = types.bool; + default = false; + description = '' + If enabled, allows substring searches. + See . + ''; + }; - onCalendar = mkOption { - type = types.str; - default = "daily"; - description = "When to run the maintenance job. See systemd.time(7) for more information about the format."; - }; + headerExcludes = mkOption { + type = types.listOf types.str; + default = [ + "Received" + "DKIM-*" + "X-*" + "Comments" + ]; + description = '' + The list of headers to exclude. + See . + ''; + }; - randomizedDelaySec = mkOption { - type = types.int; - default = 1000; - description = "Run the maintenance job not exactly at the time specified with `onCalendar`, but plus or minus this many seconds."; - }; + filters = mkOption { + type = types.listOf types.str; + default = [ + "normalizer-icu" + "snowball" + "stopwords" + ]; + description = '' + The list of filters to apply. + . + ''; }; }; @@ -1289,6 +1305,21 @@ in }; imports = [ + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "memoryLimit" ] '' + This option is not needed for fts-flatcurve + '') + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "enable" ] '' + This option is not needed for fts-flatcurve + '') + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "onCalendar" ] '' + This option is not needed for fts-flatcurve + '') + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "randomizedDelaySec" ] '' + This option is not needed for fts-flatcurve + '') + (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "minSize" ] '' + This option is not supported by fts-flatcurve + '') (lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maxSize" ] '' This option is not needed since fts-xapian 1.8.3 '') diff --git a/docs/fts.rst b/docs/fts.rst index 780ae3e..965b72e 100644 --- a/docs/fts.rst +++ b/docs/fts.rst @@ -4,7 +4,7 @@ Full text search By default, when your IMAP client searches for an email containing some text in its *body*, dovecot will read all your email sequentially. This is very slow and IO intensive. To speed body searches up, it is possible to -*index* emails with a plugin to dovecot, ``fts_xapian``. +*index* emails with a plugin to dovecot, ``fts_flatcurve``. Enabling full text search ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -50,16 +50,15 @@ recommended to move indices in a different location, such as Indexation itself is rather resouces intensive, in CPU, and for emails with large headers, in memory as well. Initial indexation of existing emails can take -hours. If the indexer worker is killed or segfaults during indexation, it can -be that it tried to allocate more memory than allowed. You can increase the memory -limit by eg ``mailserver.fullTextSearch.memoryLimit = 2000`` (in MiB). +hours. Mitigating resources requirements ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can: -* increase the minimum search term size ``mailserver.fullTextSearch.minSize`` +* exclude some headers from indexation with ``mailserver.fullTextSearch.headerExcludes`` +* disable expensive token normalisation in ``mailserver.fullTextSearch.filters`` * disable automatic indexation for some folders with ``mailserver.fullTextSearch.autoIndexExclude``. Folders can be specified by name (``"Trash"``), by special use (``"\\Junk"``) or with a wildcard. diff --git a/mail-server/dovecot.nix b/mail-server/dovecot.nix index 8e6d2b2..baa3a7e 100644 --- a/mail-server/dovecot.nix +++ b/mail-server/dovecot.nix @@ -27,6 +27,12 @@ let # This file contains the ldap bind password ldapConfFile = "${passwdDir}/dovecot-ldap.conf.ext"; bool2int = x: if x then "1" else "0"; + boolToYesNo = x: if x then "yes" else "no"; + listToLine = lib.concatStringsSep " "; + listToMultiAttrs = keyPrefix: attrs: lib.listToAttrs (lib.imap1 (n: x: { + name = "${keyPrefix}${if n==1 then "" else toString n}"; + value = x; + }) attrs); maildirLayoutAppendix = lib.optionalString cfg.useFsLayout ":LAYOUT=fs"; maildirUTF8FolderNames = lib.optionalString cfg.useUTF8FolderNames ":UTF-8"; @@ -145,10 +151,22 @@ let dovecotModules = [ pkgs.dovecot_pigeonhole - ] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot_fts_xapian; + ] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot-fts-flatcurve; # Remove and assume `false` after NixOS 25.05 haveDovecotModulesOption = options.services.dovecot2 ? "modules" && (options.services.dovecot2.modules.visible or true); + ftsPluginSettings = { + fts = "flatcurve"; + fts_languages = listToLine cfg.fullTextSearch.languages; + fts_tokenizers = listToLine [ "generic" "email-address" ]; + fts_tokenizer_email_address = "maxlen=100"; # default 254 too large for Xapian + fts_flatcurve_substring_search = boolToYesNo cfg.fullTextSearch.substringSearch; + fts_filters = listToLine cfg.fullTextSearch.filters; + fts_header_excludes = listToLine cfg.fullTextSearch.headerExcludes; + fts_autoindex = boolToYesNo cfg.fullTextSearch.autoIndex; + fts_enforced = cfg.fullTextSearch.enforced; + } // (listToMultiAttrs "fts_autoindex_exclude" cfg.fullTextSearch.autoIndexExclude); + in { config = with cfg; lib.mkIf enable { @@ -183,14 +201,17 @@ in sslServerCert = certificatePath; sslServerKey = keyPath; enableLmtp = true; - mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ "fts" "fts_xapian" ]; + mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ + "fts" + "fts_flatcurve" + ]; protocols = lib.optional cfg.enableManageSieve "sieve"; pluginSettings = { sieve = "file:${cfg.sieveDirectory}/%{user}/scripts;active=${cfg.sieveDirectory}/%{user}/active.sieve"; sieve_default = "file:${cfg.sieveDirectory}/%{user}/default.sieve"; sieve_default_name = "default"; - }; + } // (lib.optionalAttrs cfg.fullTextSearch.enable ftsPluginSettings); sieve = { extensions = [ @@ -358,27 +379,6 @@ in inbox = yes } - ${lib.optionalString cfg.fullTextSearch.enable '' - plugin { - plugin = fts fts_xapian - fts = xapian - fts_xapian = partial=${toString cfg.fullTextSearch.minSize} verbose=${bool2int cfg.debug} - - fts_autoindex = ${if cfg.fullTextSearch.autoIndex then "yes" else "no"} - - ${lib.strings.concatImapStringsSep "\n" (n: x: "fts_autoindex_exclude${if n==1 then "" else toString n} = ${x}") cfg.fullTextSearch.autoIndexExclude} - - fts_enforced = ${cfg.fullTextSearch.enforced} - } - - service indexer-worker { - ${lib.optionalString (cfg.fullTextSearch.memoryLimit != null) '' - vsz_limit = ${toString (cfg.fullTextSearch.memoryLimit*1024*1024)} - ''} - process_limit = 0 - } - ''} - lda_mailbox_autosubscribe = yes lda_mailbox_autocreate = yes ''; @@ -395,29 +395,5 @@ in }; systemd.services.postfix.restartTriggers = [ genPasswdScript ] ++ (lib.optional cfg.ldap.enable [setPwdInLdapConfFile]); - - systemd.services.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable) { - description = "Optimize dovecot indices for fts_xapian"; - requisite = [ "dovecot2.service" ]; - after = [ "dovecot2.service" ]; - startAt = cfg.fullTextSearch.maintenance.onCalendar; - serviceConfig = { - Type = "oneshot"; - ExecStart = "${pkgs.dovecot}/bin/doveadm fts optimize -A"; - PrivateDevices = true; - PrivateNetwork = true; - ProtectKernelTunables = true; - ProtectKernelModules = true; - ProtectControlGroups = true; - ProtectHome = true; - ProtectSystem = true; - PrivateTmp = true; - }; - }; - systemd.timers.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable && cfg.fullTextSearch.maintenance.randomizedDelaySec != 0) { - timerConfig = { - RandomizedDelaySec = cfg.fullTextSearch.maintenance.randomizedDelaySec; - }; - }; }; } diff --git a/tests/external.nix b/tests/external.nix index 7579b6d..7dea601 100644 --- a/tests/external.nix +++ b/tests/external.nix @@ -81,8 +81,6 @@ pkgs.nixosTest { # special use depends on https://github.com/NixOS/nixpkgs/pull/93201 autoIndexExclude = [ (if (pkgs.lib.versionAtLeast pkgs.lib.version "21") then "\\Junk" else "Junk") ]; enforced = "yes"; - # fts-xapian warns when memory is low, which makes the test fail - memoryLimit = 100000; }; }; }; @@ -493,11 +491,9 @@ pkgs.nixosTest { # should fail because this folder is not indexed client.fail("search Junk a >&2") # check that search really goes through the indexer - server.succeed( - "journalctl -u dovecot2 | grep -E 'indexer-worker.* Done indexing .INBOX.' >&2" - ) + server.succeed("journalctl -u dovecot2 | grep 'fts-flatcurve(INBOX): Query ' >&2") # check that Junk is not indexed - server.fail("journalctl -u dovecot2 | grep 'indexer-worker' | grep -i 'JUNK' >&2") + server.fail("journalctl -u dovecot2 | grep 'fts-flatcurve(JUNK): Indexing ' >&2") with subtest("dmarc reporting"): server.systemctl("start rspamd-dmarc-reporter.service") @@ -507,8 +503,6 @@ pkgs.nixosTest { server.fail("journalctl -u postfix | grep -i warning >&2") server.fail("journalctl -u dovecot2 | grep -v 'imap-login: Debug: SSL error: Connection closed' | grep -i error >&2") # harmless ? https://dovecot.org/pipermail/dovecot/2020-August/119575.html - server.fail( - "journalctl -u dovecot2 |grep -v 'Expunged message reappeared, giving a new UID'| grep -v 'FTS Xapian: Box is empty' | grep -vE 'FTS Xapian:.*does not exist. Creating it' | grep -i warning >&2" - ) + server.fail("journalctl -u dovecot2 | grep -v 'Expunged message reappeared, giving a new UID' | grep -i warning >&2") ''; }