##################################### # NNTP server configuration details # ##################################### nntp_server_host: my.new.server.example.com nntp_server_port: 119 nntp_server_user: someuser nntp_server_pass: changeme nntp_server_use_ssl: no ################# # Path settings # ################# # database location (relative or absolute path) database_path: ./db #################### # Storage settings # #################### # A higher tree depth creates more search tokens, so it improves the speed and # likelyhood of finding search results, at the cost of extra storage # requirements, more files, slower indexing. max_tree_depth: 10 ################################ # Parallel processing settings # ################################ # If you're processing headers faster than you can pull them down over the # network, you're likely not going to need more than 1 or 2 threads, but # otherwise, more threads can help. The batch size should be large enough such # that all configured threads have enough work to do. # # The higher your tree max_tree_depth, the more likely you'll need to increase # this. max_threads: 16 batch_size: 10000 ############################# # Newsgroup filter settings # ############################# # List one or more newsgroup regular expressions to include or exclude from # being indexed. Blacklisted patterns take precedence over whitelisted patterns. # These options may be repeated to include additional blacklist/whitelist # regular expressions. # If filter_newsgroup_whitelist is set, only newsgroups matching the configured # regular expressions will be included in indexing. # If not set, all of usenet will be indexed (with the exeption of # filter_newsgroup_blacklist groups) filter_newsgroup_whitelist: ^alt\.bible$ filter_newsgroup_whitelist: ^borland\.public\.cppbuilder\.* # filter_newsgroup_blacklist allows you to exclude newsgroups from being # indexed, whether filter_newsgroup_whitlelist is set or not. filter_newsgroup_blacklist: .*binaries.* ######################## # Word filter settings # ######################## # It's important to filter out commonly used words to avoid blowing up an index # in size. Huge indexes are going to eat a lot of disk space and slow down # searches. # This setting lists all substrings that should be erased from subjects and # search strings before they are tokenized. For instance, you might not want to # store all results for the word "the", or "in" and other stopwords. # List of strings is comma-separated and case-insensitive. Each subsequent # option appends to the previously defined list. # filter_erase_subtoken: the,by # This setting lets you list all tokens that will only be indexed on direct # (whole string) matches. Each token is comma-separated, and the configuration # option may be listed multiple times as well, each subsequent option appends to # the previously defined list. All tokens are case-insensitive. filter_no_subtoken: a,about,actually,almost,also,although,always,am,an,and filter_no_subtoken: any,are,as,at,be,became,become,but,by,can,could,did,do filter_no_subtoken: does,each,either,else,for,from,had,has,have,hence,how filter_no_subtoken: i,if,in,is,it,its,just,may,maybe,me,might,mine,must,my filter_no_subtoken: mine,must,my,neither,nor,not,of,oh,ok,the,to,when,where filter_no_subtoken: whereas,wherever,whenever,whether,which,while,who,whom filter_no_subtoken: whoever,whose,why,will,with,within,without,would,yes filter_no_subtoken: yet,you,your # Sets the minimum number of words in a sub-token. You may use this if you don't # want to index single-words unless they are a direct match to the subject (in # which case, you'd set this to a minimum of 2 words) - or you may even want a # higher minimum than that if you're really wanting to optimize search speed and # disk usage. minimum_subtoken_words: 2