filter.c
Go to the documentation of this file.
1 /* ========================================================================== */
2 /*! \file
3  * \brief Article filtering
4  *
5  * Copyright (c) 2012-2024 by the developers. See the LICENSE file for details.
6  *
7  * If nothing else is specified, function return zero to indicate success
8  * and a negative value to indicate an error.
9  */
10 
11 
12 /* ========================================================================== */
13 /* Include headers */
14 
15 #include "posix.h" /* Include this first because of feature test macros */
16 
17 #include <string.h>
18 
19 #include "conf.h"
20 #include "core.h"
21 #include "encoding.h"
22 #include "filter.h"
23 #include "fileutils.h"
24 #include "main.h"
25 #include "xdg.h"
26 
27 
28 /* ========================================================================== */
29 /*! \defgroup FILTER FILTER: Regular expressions and scoring
30  *
31  * Location of scorefile: \c $XDG_CONFIG_HOME/$CFG_NAME/scorefile
32  *
33  * Any line starting with \c # is treated as a comment (not parsed and ignored).
34  * All other lines are parsed as rules with 4 colon-separated fields:
35  * - Field 1: Group wildmat (the rule takes effect for matching groups only)
36  * - Field 2: Type (indicating the target element and matching method)
37  * - Field 3: Score (signed integer value)
38  * - Field 4: String
39  * No whitespace is allowed between the fields and separators.
40  *
41  * \note
42  * Because \c : (colon) is used as field separator, it is not allowed to use
43  * it in wildmats.
44  *
45  * Rules with unknown type are ignored.
46  *
47  * \attention
48  * It is required that 'SSIZE_MAX' is at least 'INT_MAX' (must be checked by
49  * build system).
50  */
51 /*! @{ */
52 
53 
54 /* ========================================================================== */
55 /* Constants */
56 
57 /*! \brief Message prefix for FILTER module */
58 #define MAIN_ERR_PREFIX "FILTER: "
59 
60 /*! \brief Permissions for score file */
61 #define FILTER_PERM (api_posix_mode_t) (API_POSIX_S_IRUSR | API_POSIX_S_IWUSR)
62 
63 /*! \name Score limits
64  *
65  * Type must be \c int .
66  * Minimum and maximum values are \c INT_MIN and \c INT_MAX .
67  */
68 /*! @{ */
69 #define FILTER_SCORE_MAX INT_MAX
70 #define FILTER_SCORE_MIN INT_MIN
71 /*! @} */
72 
73 
74 /* ========================================================================== */
75 /* Data types */
76 
77 /* Data types of score entries
78  *
79  * The IDs must start with value 0 (unknown type) and must be contiguous
80  * Most used types should be defined first for better performance
81  */
82 enum filter_rule_type
83 {
84  SCORE_TYPE_UNKNOWN = 0,
85  /* ----------------------------------------------------------------------- */
86  SCORE_TYPE_FROM = 1, /* Literal string vs. 'From' */
87  SCORE_TYPE_FROM_ERE = 2, /* Extended regular expression vs. 'From' */
88  SCORE_TYPE_SUBJECT = 3, /* Literal string vs. 'Subject' */
89  SCORE_TYPE_SUBJECT_ERE = 4, /* Extended regular expression vs. 'Subject' */
90  SCORE_TYPE_MSGID_ERE = 5, /* Extended regular expr. vs. 'Message-ID' */
91  SCORE_TYPE_GROUP = 6, /* Literal string vs. element of 'Newsgroups' */
92  /* ----------------------------------------------------------------------- */
93  SCORE_END_OF_LIST = 7
94 };
95 
96 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
97 /* Wildmat linked pattern list element */
98 struct filter_wm
99 {
100  int negate;
101  enum filter_cs cs;
102  api_posix_regex_t* ere;
103  struct filter_wm* next;
104 };
105 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
106 
107 /* Scoring rule */
108 struct filter
109 {
110  const char* group_wildmat;
111  enum filter_rule_type type;
112  int value;
113  const char* string;
114  int found;
115  struct filter* next;
116  enum filter_cs cs;
117 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
118  struct filter_wm* wm; /* Object for 'group_wildmat' */
119  api_posix_regex_t* ere; /* Extended regular expression for 'string' */
120 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
121 };
122 
123 
124 /* ========================================================================== */
125 /* Variables */
126 
127 /* Rule names for first field of score file entries
128  *
129  * \attention
130  * The data type 'enum filter_rule_type' must be suitable as index!
131  */
132 static const char* filter_type_name[] =
133 {
134  "unknown",
135  /* ----------------------------------------------------------------------- */
136  "from",
137  "from_ere",
138  "subject",
139  "subject_ere",
140  "msgid_ere",
141  "group",
142  /* ----------------------------------------------------------------------- */
143  "eol"
144 };
145 
146 static enum filter_cs filter_locale = FILTER_CS_ASCII;
147 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
148 static enum filter_cs testgroup_cs;
149 static api_posix_regex_t* testgroup_ere = NULL;
150 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
151 static size_t score_len_max = 1000;
152 static struct filter* scores = NULL;
153 static const char scorefile_name[] = "scorefile";
154 
155 
156 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
157 
158 
159 /* ========================================================================== */
160 /* Print error message if system failed to compile a regular expression
161  *
162  * \param[in] code Error code
163  * \param[in] ere Pointer to compiled ERE
164  *
165  * The value \e code must be nonzero and the last error code returned from
166  * \c regcomp() for \e ere according to the POSIX standard:
167  * <br>
168  * http://pubs.opengroup.org/onlinepubs/9699919799/functions/regcomp.html
169  *
170  * \attention
171  * The definition "last error code returned" is incomplete (may mean the last
172  * call from a thread or the last call from the whole process)
173  * => Always use the filter module from the same, single thread.
174  *
175  * \note
176  * The error message is always formatted using the POSIX locale.
177  */
178 
179 static void filter_print_ere_error(int code, api_posix_regex_t* ere)
180 {
181  const char* mod_name = MAIN_ERR_PREFIX;
182  size_t mod_len = strlen(mod_name);
183  size_t len;
184  char* buf = NULL;
185 
186  if(!code)
187  {
188  PRINT_ERROR("Can't process invalid error code");
189  }
190  else
191  {
192 # if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI
193  /* Don't use NLS for error messages on stderr */
194  api_posix_setlocale(API_POSIX_LC_MESSAGES, "POSIX");
195 # endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI */
196  len = api_posix_regerror(code, ere, buf, 0);
197  if(!len || API_POSIX_SIZE_MAX - mod_len < len)
198  {
199  PRINT_ERROR("Error message has invalid size");
200  }
201  else
202  {
203  buf = (char*) api_posix_malloc(mod_len + len);
204  if(NULL == buf)
205  {
206  PRINT_ERROR("Cannot allocate memory for error message");
207  }
208  else
209  {
210  memcpy(buf, mod_name, mod_len);
211  api_posix_regerror(code, ere, &buf[mod_len], len);
212  print_error(buf);
213  api_posix_free((void*) buf);
214  }
215  }
216 # if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI
217  api_posix_setlocale(API_POSIX_LC_MESSAGES, "");
218 # endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI */
219  }
220 }
221 
222 
223 /* ========================================================================== */
224 /* Compile ERE
225  *
226  * \param[out] cs Pointer to codeset of character classification locale
227  * \param[out] ere Pointer to compiled ERE
228  * \param[in] string Raw ERE pattern
229  *
230  * \note
231  * On success, the caller is responsible for freeing the ressources allocated
232  * for the object pointed to by \e ere .
233  */
234 
235 static int filter_compile_ere(enum filter_cs* cs, api_posix_regex_t** ere,
236  const char* string)
237 {
238  int res = 0;
239  int rv;
240  enum enc_mime_cs charset = ENC_CS_UNKNOWN;
241  const char* pat;
242  const char* p = NULL;
243 
244  *cs = filter_locale;
245  if(FILTER_CS_ISO8859_1 == filter_locale)
246  {
247  /* Convert string to ISO 8859-1 */
248  p = enc_convert_to_8bit(&charset, string, NULL);
249  if(NULL == p) { res = -1; }
250  else if(ENC_CS_ISO8859_1 != charset) { res = -1; }
251  }
252  else if(FILTER_CS_UTF_8 != filter_locale)
253  {
254  /* Treat unsupported codeset as ASCII */
255  *cs = FILTER_CS_ASCII;
256  res = enc_ascii_check(string);
257  }
258  if(res)
259  {
260  /* String cannot be process without UTF-8 locale */
261  PRINT_ERROR("ERE cannot be used with current locale");
262  }
263  else
264  {
265  /* Allocate memory */
266  *ere = (api_posix_regex_t*) api_posix_malloc(sizeof(api_posix_regex_t));
267  if(NULL == *ere)
268  {
269  PRINT_ERROR("Cannot allocate memory for regular expression");
270  res = -1;
271  }
272  else
273  {
274  /* Compile regular expression if required */
275  pat = string;
276  if(FILTER_CS_ISO8859_1 == filter_locale) { pat = p; }
277  rv = api_posix_regcomp(*ere, pat,
278  API_POSIX_REG_EXTENDED | API_POSIX_REG_NOSUB);
279  if(rv)
280  {
281  PRINT_ERROR("Compiling regular expression failed");
282  filter_print_ere_error(rv, *ere);
283  api_posix_free((void*) *ere);
284  res = -1;
285  }
286  else if(main_debug)
287  {
288  printf("%s: %sCompiling regular expression\n",
289  CFG_NAME, MAIN_ERR_PREFIX);
290  }
291  }
292  }
293  /* Release memory for ISO 8859-1 string */
294  if(NULL != p && string != p) { enc_free((void*) p); }
295 
296  return(res);
297 }
298 
299 
300 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
301 
302 
303 /* ========================================================================== */
304 /* Score rule destructor
305  *
306  * \param[in,out] rule Object created by \ref filter_score_rule_contructor()
307  */
308 
309 static void filter_score_rule_destructor(struct filter** rule)
310 {
311 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
312  struct filter_wm* p;
313  struct filter_wm* q;
314 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
315 
316  if(NULL != rule && NULL != *rule)
317  {
318  api_posix_free((void*) (*rule)->group_wildmat);
319  api_posix_free((void*) (*rule)->string);
320 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
321  /* Destroy wildmat linked pattern list */
322  p = (*rule)->wm;
323  while(NULL != p)
324  {
325  q = p->next;
326  api_posix_regfree(p->ere);
327  api_posix_free((void*) p->ere);
328  api_posix_free((void*) p);
329  p = q;
330  }
331  /* Destroy regular expression object for string ERE */
332  if(NULL != (*rule)->ere)
333  {
334  api_posix_regfree((*rule)->ere);
335  api_posix_free((void*) (*rule)->ere);
336  }
337 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
338  api_posix_free((void*) *rule);
339  *rule = NULL;
340  }
341 }
342 
343 
344 /* ========================================================================== */
345 /* Score rule constructor
346  *
347  * \param[out] new_rule Pointer to new rule
348  * \param[in] group_wildmat Limit rule scope to groups matching this wildmat
349  * \param[in] type Type of new rule
350  * \param[in] score Score if rule matches
351  * \param[in] string Literal string or regular expression
352  * \param[in] dcre Don't compile regular expressions if nonzero
353  *
354  * The parameter \e dcre should only be nonzero for exporting the rule data
355  * back to the scorefile (for comparing the raw regular expression strings).
356  *
357  * \attention
358  * The parameters \e group_wildmat and \e string must point to a memory block
359  * allocated with the function \ref api_posix_malloc() and will become part of
360  * the created object.
361  * On error the caller stay responsible to free the memory for \e wildmat and
362  * \e string .
363  *
364  * \note
365  * On success, the caller is responsible for freeing the ressources allocated
366  * for the object \e rule (use \ref filter_score_rule_destructor() function).
367  *
368  * On error \c NULL is written to \e new_rule .
369  */
370 
371 static int filter_score_rule_constructor(struct filter** new_rule,
372  const char* group_wildmat,
373  enum filter_rule_type type,
374  int score, const char* string,
375  int dcre)
376 {
377  int res = 0;
378 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
379  int rv;
380  struct enc_wm_pattern* wma;
381  struct filter_wm* pat;
382  int i;
383  struct filter_wm* last = NULL;
384 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
385 
386  /* Create new rule */
387  *new_rule = (struct filter*) api_posix_malloc(sizeof(struct filter));
388  if(NULL == *new_rule) { res = -1; }
389  else
390  {
391  (*new_rule)->group_wildmat = group_wildmat;
392  (*new_rule)->type = type;
393  (*new_rule)->value = score;
394  (*new_rule)->string = string;
395  (*new_rule)->found = 0;
396  (*new_rule)->next = NULL;
397  (*new_rule)->cs = FILTER_CS_UTF_8;
398  }
399 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
400  if(!res)
401  {
402  (*new_rule)->wm = NULL;
403  (*new_rule)->ere = NULL;
404 
405  /* Compile regular expressions for wildmat patterns */
406  if(!dcre)
407  {
408  if(strcmp("*", (*new_rule)->group_wildmat))
409  {
410  rv = enc_create_wildmat(&wma, group_wildmat);
411  if(0 < rv)
412  {
413  /* Process array backwards to get rightmost pattern first */
414  for(i = rv; i; --i)
415  {
416  pat = (struct filter_wm*)
417  api_posix_malloc(sizeof(struct filter_wm));
418  if(NULL == pat) { res = -1; break; }
419  else
420  {
421  pat->negate = wma[i - 1].negate;
422  pat->cs = FILTER_CS_UTF_8;
423  res = filter_compile_ere(&pat->cs, &pat->ere,
424  wma[i - 1].ere);
425  pat->next = NULL;
426  /* Link list */
427  if(NULL == last) { (*new_rule)->wm = pat; }
428  else { last->next = pat; }
429  last = pat;
430  }
431  if(res) { break; }
432  }
433  enc_destroy_wildmat(&wma, rv);
434  }
435  }
436  }
437 
438  /* Compile regular expression for string */
439  if(!res && !dcre)
440  {
441  switch((*new_rule)->type)
442  {
443  case SCORE_TYPE_FROM_ERE:
444  case SCORE_TYPE_SUBJECT_ERE:
445  case SCORE_TYPE_MSGID_ERE:
446  {
447  res = filter_compile_ere(&(*new_rule)->cs, &(*new_rule)->ere,
448  string);
449  break;
450  }
451  default:
452  {
453  /* Rule do not use a regular expression */
454  break;
455  }
456  }
457  }
458 
459  /* Check for error */
460  if(res)
461  {
462  PRINT_ERROR("Creating score rule failed");
463  /* Mask strings to prevent double free */
464  (*new_rule)->group_wildmat = NULL;
465  (*new_rule)->string = NULL;
466  filter_score_rule_destructor(new_rule);
467  }
468  }
469 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
470 
471  return(res);
472 }
473 
474 
475 /* ========================================================================== */
476 /* Calculate and clamp new score
477  *
478  * \param[in] val Old value
479  * \param[in] val Difference to old value that should be aplied
480  *
481  * \return
482  * - Updated score
483  */
484 
485 static int filter_score_add(int val, int diff)
486 {
487  /* Check for increase */
488  if(0 < diff)
489  {
490  if(0 < val)
491  {
492  /* Clamp to upper limit */
493  if(FILTER_SCORE_MAX - val < diff) { val = FILTER_SCORE_MAX; }
494  else { val += diff; }
495  }
496  else { val += diff; }
497  }
498 
499  /* Check for decrease */
500  if(0 > diff)
501  {
502  if(0 > val)
503  {
504  /* Clamp to lower limit */
505  if(FILTER_SCORE_MIN - val > diff) { val = FILTER_SCORE_MIN; }
506  else { val += diff; }
507  }
508  else { val += diff; }
509  }
510 
511  return(val);
512 }
513 
514 
515 /* ========================================================================== */
516 /* Add score rule
517  *
518  * \param[in] new_rule Rule to add
519  */
520 
521 static void filter_add_score_rule(struct filter* new_rule)
522 {
523  struct filter* last_rule = scores;
524 
525  if(NULL == last_rule) { scores = new_rule; }
526  else
527  {
528  /* Append rule to end of list */
529  while(NULL != last_rule->next) { last_rule = last_rule->next; }
530  last_rule->next = new_rule;
531  }
532 }
533 
534 
535 /* ========================================================================== */
536 /* Delete scoring rules */
537 
538 static void filter_delete_score_rules(void)
539 {
540  struct filter* rule = scores;
541  struct filter* next_rule;
542 
543  while(NULL != rule)
544  {
545  next_rule = rule->next;
546  filter_score_rule_destructor(&rule);
547  rule = next_rule;
548  }
549  scores = NULL;
550 }
551 
552 
553 /* ========================================================================== */
554 /* Decode score rule from score file
555  *
556  * \param[out] rule Pointer to decoded rule object
557  * \param[in] line Line from score file
558  * \param[in] len Line buffer size
559  * \param[in] dcre Dont't compile potential regular expressions if nonzero
560  *
561  * On error, \c NULL is written to \e rule .
562  *
563  * \note
564  * On success, the caller is responsible for freeing the ressources allocated
565  * for the object \e rule (use \ref filter_score_rule_destructor() function).
566  */
567 
568 static int filter_decode_rule(struct filter** rule,
569  const char* line, size_t len, int dcre)
570 {
571  int res = -1;
572  const char* p;
573  char* q;
574  enum filter_rule_type type;
575  size_t start = 0;
576  int score = 0;
577  char* group_wildmat = NULL;
578  size_t wm_len;
579  char* string = NULL;
580  int rv;
581  int error = 0;
582 
583  /* Init rule pointer to defined value */
584  *rule = NULL;
585 
586  /* Since version 1.0 all rules have a wildmat as first field */
587  p = strchr(line, (int) ':');
588  if(NULL == p)
589  {
590  PRINT_ERROR("Malformed rule in score file");
591  }
592  else
593  {
594  error = 1;
595  wm_len = (size_t) (p - line);
596  q = (char*) api_posix_realloc((void*) group_wildmat, wm_len + (size_t) 1);
597  if(NULL == q)
598  {
599  PRINT_ERROR("Cannot allocate memory for wildmat");
600  }
601  else
602  {
603  group_wildmat = q;
604  strncpy(group_wildmat, line, wm_len);
605  group_wildmat[wm_len] = 0;
606  line += wm_len + (size_t) 1;
607  error = 0;
608  }
609  }
610 
611  /* Get type */
612  type = SCORE_TYPE_UNKNOWN;
613  if(!error)
614  {
615  while(SCORE_END_OF_LIST != ++type)
616  {
617  p = filter_type_name[type];
618  start = strlen(p);
619  if(start < len && !strncmp(line, p, start))
620  {
621  /* Verify that not only the first part has matched */
622  if(':' == line[start]) { break; }
623  }
624  }
625  if(SCORE_END_OF_LIST == type) { type = SCORE_TYPE_UNKNOWN; }
626  else if(main_debug)
627  {
628  printf("%s: %sScore rule type: %s\n", CFG_NAME, MAIN_ERR_PREFIX, p);
629  }
630  }
631 
632  /* Extract string from data field */
633  if(!error)
634  {
635  string = (char*) api_posix_malloc((size_t) len);
636  if(NULL == string)
637  {
638  PRINT_ERROR("Cannot allocate memory for score rule parser");
639  }
640  else
641  {
642  /* Decode data */
643  switch(type)
644  {
645  case SCORE_TYPE_FROM:
646  case SCORE_TYPE_FROM_ERE:
647  case SCORE_TYPE_SUBJECT:
648  case SCORE_TYPE_SUBJECT_ERE:
649  case SCORE_TYPE_MSGID_ERE:
650  case SCORE_TYPE_GROUP:
651  {
652  rv = sscanf(&line[start], ":%d:%[^\n]", &score, string);
653  if(2 != rv)
654  {
655  PRINT_ERROR("Invalid rule in score file");
656  }
657  else
658  {
659  /* Success */
660  res = 0;
661  }
662  break;
663  }
664  default:
665  {
666  PRINT_ERROR("Unknown rule type in score file");
667  break;
668  }
669  }
670  }
671  }
672 
673  /* Create score rule object */
674  if(!res)
675  {
676 #if 0
677  /* For debugging */
678  printf("=============================\n");
679  printf(" Groups: %s\n", group_wildmat);
680  printf(" Type : %s\n", filter_type_name[type]);
681  printf(" Score : %d\n", score);
682  printf(" String: %s\n", string);
683 #endif
684  res = filter_score_rule_constructor(rule, group_wildmat,
685  type, score, string, dcre);
686  }
687 
688  /* Release memory on error */
689  if(res)
690  {
691  api_posix_free((void*) group_wildmat);
692  api_posix_free((void*) string);
693  }
694 
695  /*
696  * For code review:
697  * The allocated memory blocks becomes part of the new score rule object!
698  */
699 
700  return(res);
701 }
702 
703 
704 /* ========================================================================== */
705 /* Encode score rule for score file
706  *
707  * \param[in,out] line Pointer to line for score file
708  * \param[in,out] len Pointer to line buffer size
709  * \param[in] rule Rule to encode
710  *
711  * \attention
712  * The pointer \e line must be \c NULL or point to a dynamically allocated
713  * buffer.
714  */
715 
716 static int filter_encode_rule(char** line, size_t* len, struct filter* rule)
717 {
718  const char* frt = NULL; /* Filter Rule Type */
719  int res = -1;
720  int rv;
721  char* p = NULL;
722  size_t l = 0;
723 
724  /* Create new score rule */
725  switch(rule->type)
726  {
727  case SCORE_TYPE_FROM:
728  case SCORE_TYPE_FROM_ERE:
729  case SCORE_TYPE_SUBJECT:
730  case SCORE_TYPE_SUBJECT_ERE:
731  case SCORE_TYPE_MSGID_ERE:
732  case SCORE_TYPE_GROUP:
733  {
734  frt = filter_type_name[rule->type];
735  l += strlen(rule->group_wildmat);
736  l += (size_t) 1; /* Field separator */
737  l += strlen(frt); /* Type ID */
738  l += (size_t) 1; /* Field separator */
739  l += score_len_max; /* Score value */
740  l += (size_t) 1; /* Field separator */
741  l += strlen(rule->string);
742  l += (size_t) 1; /* LF line termination */
743  l += (size_t) 1; /* NUL termination */
744  p = (char*) api_posix_malloc(l);
745  if(NULL == p)
746  {
747  PRINT_ERROR("Cannot allocate memory for score rule");
748  }
749  else
750  {
751  /* Since version 1.0 all rules use a wildmat */
752  rv = api_posix_snprintf(p, l, "%s:%s:%d:%s\n", rule->group_wildmat,
753  frt, rule->value, rule->string);
754  if(0 > rv || (size_t) rv >= l)
755  {
756  PRINT_ERROR("Encoding score rule failed");
757  api_posix_free((void*) p);
758  }
759  else
760  {
761  /* Success => Replace line buffer */
762  api_posix_free((void*) *line);
763  *line = p;
764  *len = l;
765  res = 0;
766  }
767  }
768  break;
769  }
770  default:
771  {
772  PRINT_ERROR("Encoding unknown rule type failed");
773  break;
774  }
775  }
776 
777  return(res);
778 }
779 
780 
781 /* ========================================================================== */
782 /* Check whether score rule match
783  *
784  * \param[in] line Line from score file
785  * \param[in] len Line buffer size
786  * \param[in] rule Rule to match
787  */
788 
789 static int filter_check_rule(char* line, size_t len, struct filter* rule)
790 {
791  int res = -1;
792  struct filter* current_rule;
793 
794  /* Decode line */
795  if('#' != line[0] && '\n' != line[0])
796  {
797  /* Set 'dcre' flag, we only want to compare the strings of the rule */
798  if(!filter_decode_rule(&current_rule, line, len, 1))
799  {
800  /* Check whether score has changed */
801  if(current_rule->type == rule->type)
802  {
803  if(!strcmp(current_rule->string, rule->string))
804  {
805  /* Match detected */
806  res = 0;
807  }
808  }
809  /* Destroy current rule object */
810  filter_score_rule_destructor(&current_rule);
811  }
812  }
813 
814  return(res);
815 }
816 
817 
818 /* ========================================================================== */
819 /* Export score rules
820  *
821  * \param[in] fs Stream corresponding to old configuration
822  * \param[in] fs_tmp Stream corresponding to new configuration
823  *
824  * The current data in memory is merged with the data from \e fs and written
825  * to \e fs_tmp .
826  */
827 
828 static int filter_export_score_rules(FILE* fs, FILE* fs_tmp)
829 {
830  int res = -1;
831  char* line = NULL;
832  size_t len = 0;
833  api_posix_ssize_t readlen;
834  int rv;
835  struct filter* rule;
836 
837  if(main_debug)
838  {
839  printf("%s: %sStore scoring rules\n", CFG_NAME, MAIN_ERR_PREFIX);
840  }
841 
842  while(1)
843  {
844  /* Read line */
845  readlen = api_posix_getline(&line, &len, fs);
846  if(-1 == readlen)
847  {
848  if(API_POSIX_ENOMEM == api_posix_errno)
849  {
850  PRINT_ERROR("Cannot assign memory for score file parser");
851  }
852  else
853  {
854  /* Check for error */
855  if(ferror(fs))
856  {
857  PRINT_ERROR("Parse error in score file");
858  }
859  /* Check for EOF */
860  else if(feof(fs))
861  {
862  res = 0;
863  }
864  }
865  }
866  if(0 >= readlen) { break; }
867  else
868  {
869  /* Update data */
870  rule = scores;
871  while(NULL != rule)
872  {
873  if(!rule->found && !filter_check_rule(line, len, rule))
874  {
875  /* Match => Update */
876  rule->found = 1;
877  filter_encode_rule(&line, &len, rule);
878  break;
879  }
880  rule = rule->next;
881  }
882 
883  /* Write line to new config file */
884  rv = fprintf(fs_tmp, "%s", line);
885  if(0 > rv) { break; }
886  }
887  }
888 
889  /* Add missing entries to end of config file */
890  if(!res)
891  {
892  rule = scores;
893  while(NULL != rule)
894  {
895  if(!rule->found)
896  {
897  rv = filter_encode_rule(&line, &len, rule);
898  if(rv) { res = -1; break; }
899  else
900  {
901  /* Write new rule */
902  rv = fprintf(fs_tmp, "%s", line);
903  if(0 > rv)
904  {
905  res = -1;
906  break;
907  }
908  }
909  }
910  rule = rule->next;
911  }
912  }
913 
914  /* Release memory for line buffer */
915  api_posix_free((void*) line);
916 
917  return(res);
918 }
919 
920 
921 /* ========================================================================== */
922 /* Get scorefile pathname
923  *
924  * This function must be thread safe.
925  * The caller is responsible to free the memory for the buffer on success.
926  */
927 
928 static int filter_get_pathname(const char** pathname, const char* filename)
929 {
930  int res = -1;
931  int rv;
932 
933  *pathname = xdg_get_confdir(CFG_NAME);
934  if(NULL != *pathname)
935  {
936  rv = fu_create_path(*pathname, (api_posix_mode_t) API_POSIX_S_IRWXU);
937  if(0 == rv)
938  {
939  /* Store scorefile pathname */
940  rv = xdg_append_to_path(pathname, filename);
941  if(0 == rv)
942  {
943  res = 0;
944  }
945  }
946  }
947 
948  /* Free memory on error */
949  if(0 != res)
950  {
951  PRINT_ERROR("Cannot create score file pathname");
952  api_posix_free((void*) *pathname);
953  *pathname = NULL;
954  }
955 
956  return(res);
957 }
958 
959 
960 /* ========================================================================== */
961 /* Check whether a group in the list matches a wildmat
962  *
963  * Returns success (zero) if one of the groups in \e grouplist matches
964  * \e wildmat .
965  */
966 
967 static int filter_group_check(struct filter* rule, const char** grouplist)
968 {
969  int res = -1;
970  size_t i;
971  const char* group;
972 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
973  struct filter_wm* plp = rule->wm;
974  int rv;
975  const char* string;
976  const char* p = NULL;
977  enum enc_mime_cs charset = ENC_CS_UNKNOWN;
978 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
979 
980  i = 0;
981  /* Assignment in truth expression is intended! */
982  while(NULL != (group = grouplist[i++]))
983  {
984  /* "Match all" must always work */
985  if(!strcmp("*", rule->group_wildmat)) { res = 0; break; }
986  /* Check for literal match (usable for all locales) */
987  if(!strcmp(rule->group_wildmat, group)) { res = 0; break; }
988 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
989  /* Check with regular expressions created from group wildmat */
990  while(NULL != plp)
991  {
992  rv = 0;
993  string = group;
994  if(FILTER_CS_ASCII == plp->cs)
995  {
996  rv = enc_ascii_check(string);
997  }
998  else if(FILTER_CS_ISO8859_1 == plp->cs)
999  {
1000  /* Try to convert data to ISO 8859-1 */
1001  p = enc_convert_to_8bit(&charset, string, NULL);
1002  if(NULL == p) { rv = -1; }
1003  else if(ENC_CS_ISO8859_1 != charset) { rv = -1; }
1004  else { string = p; }
1005  }
1006  if(!rv && !api_posix_regexec(plp->ere, string, 0, NULL, 0))
1007  {
1008  res = 0;
1009  }
1010  /* Release memory for ISO 8859-1 string */
1011  if(NULL != p && group != p) { enc_free((void*) p); }
1012  /* Check for ERE match */
1013  if(!res)
1014  {
1015  /* printf("ERE of wildmat pattern matched\n"); */
1016  if(plp->negate) { res = -1; }
1017  break;
1018  }
1019  /* Next pattern in list */
1020  plp = plp->next;
1021  }
1022 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
1023  }
1024 
1025  return(res);
1026 }
1027 
1028 
1029 /* ========================================================================== */
1030 /*! \brief Initialize filter module
1031  *
1032  * \param[in] utf8 Flag indicating that the locale use UTF-8 encoding
1033  *
1034  * \attention
1035  * Remember that the locale must use either UTF-8 or ISO 8859-1 codeset or be
1036  * the POSIX locale.
1037  *
1038  * Step1 (only if \c CONF_SCORERC is configured):
1039  * - Rename current \c scorefile to \c scorefile.old
1040  * - Copy pathname configured with \c CONF_SCORERC to \c scorefile
1041  *
1042  * Step 2:
1043  * - Open and lock scorefile
1044  * - Load rules from scorefile to memory
1045  *
1046  * \return
1047  * - 0 on success
1048  * - Negative value on error
1049  */
1050 
1051 int filter_init(int utf8)
1052 {
1053  int res = -1;
1054  const char* scorerc = config[CONF_SCORERC].val.s;
1055  const char* scorepathname = NULL;
1056  char* oldscorepathname = NULL;
1057  int rv;
1058  api_posix_struct_stat state;
1059  int fd = -1;
1060  FILE* fs = NULL;
1061  char* data = NULL;
1062  size_t len;
1063  char* line = NULL;
1064  api_posix_ssize_t readlen;
1065  struct filter* rule;
1066 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI
1067  const char* loc_ctype;
1068 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI */
1069 
1070 #if !(CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB)
1071  PRINT_ERROR("Regular expression support disabled by configuration");
1072 #endif /* !(CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB) */
1073 
1074  /* Set locale and check codeset */
1075  filter_locale = FILTER_CS_ASCII;
1076 #if !(CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI)
1077 # if CFG_USE_CLB
1078  PRINT_ERROR("Cannot set locale due to configuration");
1079 # endif /* CFG_USE_CLB */
1080  printf("%s: %sCooked character classification codeset: "
1081  "US-ASCII\n", CFG_NAME, MAIN_ERR_PREFIX);
1082 #else /* !(CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI) */
1083  loc_ctype = api_posix_setlocale(API_POSIX_LC_CTYPE, "");
1084  if(NULL == loc_ctype)
1085  {
1086  PRINT_ERROR("Setting locale for category 'LC_CTYPE' failed");
1087  return(-1);
1088  }
1089  else
1090  {
1091  printf("%s: %sCharacter classification locale: %s\n",
1092  CFG_NAME, MAIN_ERR_PREFIX, loc_ctype);
1093  if(utf8)
1094  {
1095  printf("%s: %sCooked character classification codeset: "
1096  "UTF-8\n", CFG_NAME, MAIN_ERR_PREFIX);
1097  filter_locale = FILTER_CS_UTF_8;
1098  }
1099  else
1100  {
1101 # if CFG_USE_XSI
1102  loc_ctype = api_posix_nl_langinfo(CODESET);
1103 # endif /* CFG_USE_XSI */
1104  /* Check whether fallback to ISO 8859-1 is possible */
1105  if( NULL != strstr(loc_ctype, "8859-1")
1106  || NULL != strstr(loc_ctype, "8859_1")
1107  || NULL != strstr(loc_ctype, "88591") )
1108  {
1109  /* Verify that it is not something like "8859-15" */
1110  if('1' == loc_ctype[strlen(loc_ctype) - (size_t) 1])
1111  {
1112  printf("%s: %sCooked character classification codeset: "
1113  "ISO-8859-1\n", CFG_NAME, MAIN_ERR_PREFIX);
1114  filter_locale = FILTER_CS_ISO8859_1;
1115  }
1116  else
1117  {
1118  PRINT_ERROR("Codeset of locale not supported");
1119  PRINT_ERROR("Supported codesets: US-ASCII, ISO-8859-1, UTF-8");
1120  PRINT_ERROR("(Use \"locale -a\" to find a locale)");
1121  return(-1);
1122  }
1123  }
1124  else
1125  {
1126  if( !strcmp(loc_ctype, "POSIX")
1127  || !strcmp(loc_ctype, "C")
1128  || NULL != strstr(loc_ctype, "ASCII")
1129  || NULL != strstr(loc_ctype, "X3.4") )
1130  {
1131  printf("%s: %sCooked character classification codeset: "
1132  "US-ASCII\n", CFG_NAME, MAIN_ERR_PREFIX);
1133  }
1134  else
1135  {
1136  PRINT_ERROR("Codeset of locale not supported");
1137  PRINT_ERROR("Supported codesets: US-ASCII, ISO-8859-1, UTF-8");
1138  PRINT_ERROR("(Use \"locale -a\" to find a locale)");
1139  return(-1);
1140  }
1141  }
1142  }
1143  }
1144 #endif /* !(CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI) */
1145 
1146  /*
1147  * Calculate maximum length of score value strings
1148  *
1149  * Note: Use of snprintf() must no longer be SUS Version 2 compatible.
1150  * C99/POSIX.1-2001/SUSv3 semantics are now provided by the POSIX module.
1151  */
1152  rv = api_posix_snprintf(NULL, 0, "%d", FILTER_SCORE_MAX);
1153  if(0 <= rv)
1154  {
1155  score_len_max = (size_t) rv;
1156  rv = api_posix_snprintf(NULL, 0, "%d", FILTER_SCORE_MIN);
1157  if(0 <= rv)
1158  {
1159  if((size_t) rv > score_len_max) { score_len_max = (size_t) rv; }
1160  res = 0;
1161  }
1162  }
1163  if(res)
1164  {
1165  PRINT_ERROR("Calculation of maximum score string length failed");
1166  return(res);
1167  }
1168 
1169  /* Step 1 */
1170  if(strlen(scorerc))
1171  {
1172  if(main_debug)
1173  {
1174  printf("%s: %sImport external scorerc: %s\n",
1175  CFG_NAME, MAIN_ERR_PREFIX, scorerc);
1176  }
1177  rv = api_posix_stat(scorerc, &state);
1178  if(rv) { PRINT_ERROR("Cannot stat scorerc file"); }
1179  else if(API_POSIX_S_ISREG(state.st_mode))
1180  {
1181  rv = filter_get_pathname(&scorepathname, scorefile_name);
1182  if(!rv)
1183  {
1184  /* Read scorerc file */
1185  rv = fu_open_file(scorerc, &fd, API_POSIX_O_RDWR,
1186  (api_posix_mode_t) 0);
1187  if(!rv)
1188  {
1189  rv = fu_lock_file(fd);
1190  if(!rv)
1191  {
1192  rv = fu_read_whole_file(fd, &data, &len);
1193  }
1194  fu_close_file(&fd, NULL);
1195  if(!rv)
1196  {
1197  oldscorepathname =
1198  api_posix_malloc(strlen(scorepathname) + (size_t) 5);
1199  if(NULL == oldscorepathname)
1200  {
1201  PRINT_ERROR("Cannot allocate memory for pathname");
1202  }
1203  else
1204  {
1205  strcpy(oldscorepathname, scorepathname);
1206  strcat(oldscorepathname, ".old");
1207  rv = api_posix_rename(scorepathname, oldscorepathname);
1208  if(rv)
1209  {
1210  PRINT_ERROR("Renaming score file failed");
1211  }
1212  rv = fu_open_file(scorepathname, &fd,
1213  API_POSIX_O_WRONLY | API_POSIX_O_CREAT,
1214  FILTER_PERM);
1215  if(!rv)
1216  {
1217  rv = fu_lock_file(fd);
1218  if(!rv)
1219  {
1220  len = strlen(data);
1221  rv = fu_write_to_filedesc(fd, data, len);
1222  }
1223  fu_close_file(&fd, NULL);
1224  }
1225  }
1226  }
1227  }
1228  }
1229  }
1230  if(rv)
1231  {
1232  PRINT_ERROR("Importing scorerc failed, using local scorefile");
1233  }
1234  }
1235  /* Release memory */
1236  api_posix_free((void*) data);
1237  api_posix_free((void*) oldscorepathname);
1238  api_posix_free((void*) scorepathname);
1239  scorepathname = NULL;
1240 
1241  /* Step 2 */
1242  rv = filter_get_pathname(&scorepathname, scorefile_name);
1243  if(!rv)
1244  {
1245  rv = api_posix_stat(scorepathname, &state);
1246  if(rv) { PRINT_ERROR("Cannot stat score file"); }
1247  else if(API_POSIX_S_ISREG(state.st_mode))
1248  {
1249  rv = fu_open_file(scorepathname, &fd, API_POSIX_O_RDWR,
1250  (api_posix_mode_t) 0);
1251  if(!rv)
1252  {
1253  rv = fu_lock_file(fd);
1254  if(!rv)
1255  {
1256  rv = fu_assign_stream(fd, &fs, "r");
1257  if(!rv)
1258  {
1259  /* Load scoring rules */
1260  if(main_debug)
1261  {
1262  printf("%s: %sLoad scoring rules from: %s\n",
1263  CFG_NAME, MAIN_ERR_PREFIX, scorepathname);
1264  }
1265  while(1)
1266  {
1267  /* Read line */
1268  readlen = api_posix_getline(&line, &len, fs);
1269  if(-1 == readlen)
1270  {
1271  if(API_POSIX_ENOMEM == api_posix_errno)
1272  {
1273  PRINT_ERROR("Cannot allocate memory for score "
1274  "file parser");
1275  }
1276  else
1277  {
1278  /* Check for error */
1279  if(ferror(fs))
1280  {
1281  PRINT_ERROR("Parse error in score file");
1282  }
1283  }
1284  }
1285  if(0 >= readlen) { break; }
1286  else
1287  {
1288  /* Extract data */
1289  if('#' == line[0] || '\n' == line[0]) { continue; }
1290  rv = filter_decode_rule(&rule, line, (size_t) readlen,
1291  0);
1292  if(!rv) { filter_add_score_rule(rule); }
1293  }
1294  }
1295  api_posix_free((void*) line);
1296  rv = 0;
1297  }
1298  }
1299  }
1300  fu_close_file(&fd, &fs);
1301  }
1302  if(rv) { PRINT_ERROR("Importing rules from score file failed"); }
1303  }
1304  /* Release memory */
1305  api_posix_free((void*) scorepathname);
1306 
1307 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
1308  /* Initialize test group checking facility */
1309  if(!strlen(config[CONF_TESTGRP_ERE].val.s))
1310  {
1311  PRINT_ERROR("No ERE for test group matching found in config file");
1312  rv = -1;
1313  }
1314  else
1315  {
1316  /* Compile testgroup ERE */
1317  printf("%s: %sEnabling test group checking facility\n",
1318  CFG_NAME, MAIN_ERR_PREFIX);
1319  rv = filter_compile_ere(&testgroup_cs, &testgroup_ere,
1320  config[CONF_TESTGRP_ERE].val.s);
1321  }
1322  if(rv) { testgroup_ere = NULL; }
1323 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
1324 
1325  return(res);
1326 }
1327 
1328 
1329 /* ========================================================================== */
1330 /*! \brief Shutdown filter module
1331  *
1332  * Step 1:
1333  * - Open and lock scorefile
1334  * - Save scoring rules in memory to score file
1335  * - Delete scoring rules from memory
1336  *
1337  * Step 2 (only if \c CONF_SCORERC is configured):
1338  * - Copy \c scorefile to the location configured with \c CONF_SCORERC
1339  */
1340 
1341 void filter_exit(void)
1342 {
1343  const char* scorerc = config[CONF_SCORERC].val.s;
1344  const char* scorepathname = NULL;
1345  char* tmppathname = NULL;
1346  int rv;
1347  int fd = -1;
1348  FILE* fs = NULL;
1349  int fd_tmp = -1;
1350  FILE* fs_tmp = NULL;
1351  char* data = NULL;
1352  size_t len;
1353  api_posix_struct_stat state;
1354  char* p;
1355 
1356  /* Step 1 */
1357  rv = filter_get_pathname(&scorepathname, scorefile_name);
1358  if(!rv)
1359  {
1360  /* Create scorefile if it does not exist */
1361  rv = api_posix_stat(scorepathname, &state);
1362  if(rv && API_POSIX_ENOENT == api_posix_errno)
1363  {
1364  fu_open_file(scorepathname, &fd_tmp,
1365  API_POSIX_O_WRONLY | API_POSIX_O_CREAT, FILTER_PERM);
1366  fu_close_file(&fd_tmp, NULL);
1367  }
1368  /* Open scorefile */
1369  rv = api_posix_stat(scorepathname, &state);
1370  if(rv) { PRINT_ERROR("Cannot stat score file"); }
1371  else if(API_POSIX_S_ISREG(state.st_mode))
1372  {
1373  rv = fu_open_file(scorepathname, &fd, API_POSIX_O_RDWR,
1374  (api_posix_mode_t) 0);
1375  if(!rv)
1376  {
1377  rv = fu_lock_file(fd);
1378  if(!rv)
1379  {
1380  rv = fu_assign_stream(fd, &fs, "r");
1381  if(!rv)
1382  {
1383  /* Open temporary file */
1384  tmppathname = api_posix_malloc(strlen(scorepathname)
1385  + (size_t) 5);
1386  if(NULL == tmppathname)
1387  {
1388  PRINT_ERROR("Cannot allocate memory for pathname");
1389  }
1390  else
1391  {
1392  strcpy(tmppathname, scorepathname);
1393  strcat(tmppathname, ".new");
1394  rv = fu_open_file(tmppathname, &fd_tmp, API_POSIX_O_WRONLY
1395  | API_POSIX_O_CREAT | API_POSIX_O_TRUNC,
1396  FILTER_PERM);
1397  /*
1398  * Because we have the lock for the score file, it is
1399  * allowed to assume that no other instance of the program
1400  * currently use the temporary filename.
1401  */
1402  if(!rv)
1403  {
1404  rv = fu_assign_stream(fd_tmp, &fs_tmp, "w");
1405  if(!rv)
1406  {
1407  rv = filter_export_score_rules(fs, fs_tmp);
1408  }
1409  /* Flush stream of temporary file*/
1410  if (!rv) { rv = fu_sync(fd_tmp, fs_tmp); }
1411  /* Rename temporary file to score file */
1412  if(!rv)
1413  {
1414  rv = api_posix_rename(tmppathname, scorepathname);
1415  }
1416  if(rv)
1417  {
1418  if(tmppathname)
1419  {
1420  (void) fu_unlink_file(tmppathname);
1421  }
1422  }
1423  }
1424  fu_close_file(&fd_tmp, &fs_tmp);
1425  }
1426  }
1427  }
1428  }
1429  fu_close_file(&fd, &fs);
1430  }
1431  if(rv) { PRINT_ERROR("Exporting rules to score file failed"); }
1432  }
1433  filter_delete_score_rules();
1434 
1435  /* Step 2 */
1436  if(!rv && strlen(scorerc))
1437  {
1438  /* Read scorefile */
1439  rv = fu_open_file(scorepathname, &fd, API_POSIX_O_RDWR,
1440  (api_posix_mode_t) 0);
1441  if(!rv)
1442  {
1443  rv = fu_lock_file(fd);
1444  if(!rv)
1445  {
1446  rv = fu_read_whole_file(fd, &data, &len);
1447  }
1448  fu_close_file(&fd, NULL);
1449  if(!rv)
1450  {
1451  /* Write scorerc file */
1452  if(main_debug)
1453  {
1454  printf("%s: %sExport to external scorerc: %s\n",
1455  CFG_NAME, MAIN_ERR_PREFIX, scorerc);
1456  }
1457  p = api_posix_realloc(tmppathname, strlen(scorerc) + (size_t) 5);
1458  if(NULL == p)
1459  {
1460  PRINT_ERROR("Cannot allocate memory for pathname");
1461  }
1462  else
1463  {
1464  tmppathname = p;
1465  strcpy(tmppathname, scorerc);
1466  strcat(tmppathname, ".new");
1467  rv = fu_open_file(tmppathname, &fd,
1468  API_POSIX_O_WRONLY | API_POSIX_O_CREAT,
1469  FILTER_PERM);
1470  if(!rv)
1471  {
1472  rv = fu_lock_file(fd);
1473  if(!rv)
1474  {
1475  len = strlen(data);
1476  rv = fu_write_to_filedesc(fd, data, len);
1477  if(rv) { rv = fu_sync(fd, NULL); }
1478  if(rv)
1479  {
1480  PRINT_ERROR("Writing data to scorerc file failed");
1481  }
1482  else
1483  {
1484  rv = api_posix_rename(tmppathname, scorerc);
1485  if(rv)
1486  {
1487  PRINT_ERROR("Renaming new scorerc file failed");
1488  }
1489  }
1490  }
1491  fu_close_file(&fd, NULL);
1492  }
1493  }
1494  }
1495  }
1496  if(rv)
1497  {
1498  PRINT_ERROR("Exporting score file data to scorerc failed");
1499  }
1500  }
1501 
1502  /* Release memory */
1503  api_posix_free((void*) data);
1504  api_posix_free((void*) tmppathname);
1505  api_posix_free((void*) scorepathname);
1506 
1507 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
1508  /* Destroy testgroup ERE */
1509  if(NULL != testgroup_ere)
1510  {
1511  /* Destroy regular expression object */
1512  api_posix_regfree(testgroup_ere);
1513  api_posix_free((void*) testgroup_ere);
1514  testgroup_ere = NULL;
1515  }
1516 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
1517 
1518  /* Clear locale configuration */
1519  filter_locale = FILTER_CS_ASCII;
1520 }
1521 
1522 
1523 /* ========================================================================== */
1524 /*! \brief Check for test group
1525  *
1526  * \param[in] group Single newsgroup name (not list)
1527  *
1528  * The test group ERE from the configuration is used for matching.
1529  *
1530  * \return
1531  * - 1 if \e group is a test group
1532  * - 0 otherwise
1533  */
1534 
1535 int filter_check_testgroup(const char* group)
1536 {
1537  int res = 0;
1538 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
1539  int rv = 0;
1540  const char* string;
1541  const char* p = NULL;
1542  enum enc_mime_cs charset = ENC_CS_UNKNOWN;
1543 
1544  /* Check whether testgroup ERE really was compiled */
1545  if(NULL == testgroup_ere)
1546  {
1547  PRINT_ERROR("Test group check failed (ERE not compiled)");
1548  }
1549  else
1550  {
1551  string = group;
1552  if(FILTER_CS_ASCII == testgroup_cs)
1553  {
1554  rv = enc_ascii_check(string);
1555  }
1556  else if(FILTER_CS_ISO8859_1 == testgroup_cs)
1557  {
1558  /* Try to convert data to ISO 8859-1 */
1559  p = enc_convert_to_8bit(&charset, string, NULL);
1560  if(NULL == p) { rv = -1; }
1561  else if(ENC_CS_ISO8859_1 != charset) { rv = -1; }
1562  else { string = p; }
1563  }
1564  if(rv)
1565  {
1566  PRINT_ERROR("Test group name cannot be checked with current locale");
1567  }
1568  else if(!api_posix_regexec(testgroup_ere, string, 0, NULL, 0))
1569  {
1570  printf("%s: %sTest group detected\n", CFG_NAME, MAIN_ERR_PREFIX);
1571  res = 1;
1572  }
1573  }
1574 
1575  /* Release memory for ISO 8859-1 string */
1576  if(NULL != p && group != p) { enc_free((void*) p); }
1577 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
1578 
1579  return(res);
1580 }
1581 
1582 
1583 /* ========================================================================== */
1584 /*! \brief Check for own article
1585  *
1586  * \param[in] he Pointer to article hierarchy element
1587  *
1588  * The identity configuration is used as reference for matching.
1589  *
1590  * \return
1591  * - 1 if \e he corresponds to own article
1592  * - 0 otherwise
1593  */
1594 
1596 {
1597  int res = 0;
1598 
1599  if(NULL != he->header)
1600  {
1601  /*
1602  * Note 1:
1603  * The element 'from' (corresponding to the mandatory header field "From")
1604  * is never 'NULL'. If the header field is missing in the article, the
1605  * constructor for the hierarchy element in the CORE module inserts a
1606  * valid empty string.
1607  *
1608  * Note 2:
1609  * Configurations elements of string type are never 'NULL'. If no value
1610  * is found in the configfile, the FILTER module inserts valid empty
1611  * strings.
1612  */
1613 
1614  /*
1615  * This is the simple default rule
1616  * It matches to the identity configuration of the user.
1617  */
1618  if(he->header->from[0] && config[CONF_FROM].val.s[0])
1619  {
1620  if(!strcmp(he->header->from, config[CONF_FROM].val.s)
1621  || !strcmp(he->header->from, config[CONF_REPLYTO].val.s))
1622  {
1623  res = 1;
1624  }
1625  }
1626  /* Hook in more sophisticated custom code here if desired */
1627  }
1628 
1629  return(res);
1630 }
1631 
1632 
1633 /* ========================================================================== */
1634 /*! \brief Check for reply to own article
1635  *
1636  * \param[in] he Pointer to article hierarchy element
1637  *
1638  * \return
1639  * - 1 if \e he corresponds to a reply to an own article
1640  * - 0 otherwise
1641  */
1642 
1644 {
1645  int res = 0;
1646  const char* last_ref = "";
1647  size_t i;
1648 
1649  if(NULL != he->parent)
1650  {
1651  res = filter_match_own(he->parent);
1652  if(res)
1653  {
1654  /* Verify that there are no missing articles in between */
1655  if(NULL != he->header->refs)
1656  {
1657  i = 0;
1658  while(NULL != he->header->refs[i])
1659  {
1660  last_ref = he->header->refs[i++];
1661  }
1662  }
1663  if(strcmp(last_ref, he->parent->header->msgid))
1664  {
1665  /* There are missing article(s) in between */
1666  res = 0;
1667  }
1668  }
1669  }
1670 
1671  return(res);
1672 }
1673 
1674 
1675 /* ========================================================================== */
1676 /*! \brief Get article score
1677  *
1678  * \param[in] he Pointer to article hierarchy element
1679  *
1680  * \return
1681  * - Score of article
1682  * - 0 if no score is defined for article corresponding to \e he .
1683  */
1684 
1686 {
1687  int res = 0;
1688  struct filter* rule = scores;
1689  const char* data;
1690  size_t i;
1691 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
1692  int rv;
1693  const char* string;
1694  const char* p = NULL;
1695  enum enc_mime_cs charset = ENC_CS_UNKNOWN;
1696 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
1697 
1698  if(NULL != he->header)
1699  {
1700  while(NULL != rule)
1701  {
1702  /* Check whether group matches wildmat */
1703  if(!filter_group_check(rule, he->header->groups))
1704  {
1705  /* Yes => Check rule type */
1706  data = NULL;
1707  switch(rule->type)
1708  {
1709  /* ----------------------------------------------------------- */
1710  /* Literal matching */
1711  case SCORE_TYPE_FROM:
1712  {
1713  if(NULL == data) { data = he->header->from; }
1714  /* No break here is intended! */
1715  }
1716  /* FALLTHROUGH */
1717  case SCORE_TYPE_SUBJECT:
1718  {
1719  if(NULL == data) { data = he->header->subject; }
1720  if(!strcmp(rule->string, data))
1721  {
1722  res = filter_score_add(res, rule->value);
1723  }
1724  break;
1725  }
1726  /* ----------------------------------------------------------- */
1727  /* Literal matching against field element */
1728  case SCORE_TYPE_GROUP:
1729  {
1730  i = 0;
1731  /* Assignment in truth expression is intended! */
1732  while(NULL != (data = he->header->groups[i++]))
1733  {
1734  if(!strcmp(rule->string, data))
1735  {
1736  res = filter_score_add(res, rule->value);
1737  }
1738  }
1739  break;
1740  }
1741  /* ----------------------------------------------------------- */
1742  /* Extended regular expression matching */
1743  case SCORE_TYPE_FROM_ERE:
1744  {
1745  if(NULL == data) { data = he->header->from; }
1746  /* No break here is intended! */
1747  }
1748  /* FALLTHROUGH */
1749  case SCORE_TYPE_SUBJECT_ERE:
1750  {
1751  if(NULL == data) { data = he->header->subject; }
1752  /* No break here is intended! */
1753  }
1754  /* FALLTHROUGH */
1755  case SCORE_TYPE_MSGID_ERE:
1756  {
1757  if(NULL == data) { data = he->header->msgid; }
1758 #if CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB
1759  if(NULL == rule->ere)
1760  {
1761  /*
1762  * If this happens, the 'dcre' parameter of the score rule
1763  * constructor was not used correctly.
1764  */
1765  PRINT_ERROR("Regular expression not compiled (bug)");
1766  }
1767  else
1768  {
1769  rv = 0;
1770  string = data;
1771  if(FILTER_CS_ASCII == rule->cs)
1772  {
1773  rv = enc_ascii_check(string);
1774  }
1775  else if(FILTER_CS_ISO8859_1 == rule->cs)
1776  {
1777  /* Try to convert data to ISO 8859-1 */
1778  p = enc_convert_to_8bit(&charset, string, NULL);
1779  if(NULL == p) { rv = -1; }
1780  else if(ENC_CS_ISO8859_1 != charset) { rv = -1; }
1781  else { string = p; }
1782  }
1783  if(!rv && !api_posix_regexec(rule->ere, string, 0, NULL, 0))
1784  {
1785  res = filter_score_add(res, rule->value);
1786  }
1787  /* Release memory for ISO 8859-1 string */
1788  if(NULL != p && data != p) { enc_free((void*) p); }
1789  }
1790 #endif /* CFG_USE_POSIX_API >= 200112 || CFG_USE_XSI || CFG_USE_CLB */
1791  break;
1792  }
1793  /* ----------------------------------------------------------- */
1794  default:
1795  {
1796  PRINT_ERROR("Unknown type of score rule (bug)");
1797  break;
1798  }
1799  }
1800  }
1801  rule = rule->next;
1802  }
1803  }
1804 
1805  return(res);
1806 }
1807 
1808 
1809 /* ========================================================================== */
1810 /*! \brief Get codeset of locale category \c LC_CTYPE
1811  *
1812  * \return
1813  * - Codeset ID of locale category \c LC_CTYPE
1814  */
1815 
1816 enum filter_cs filter_get_locale_ctype(void)
1817 {
1818  return(filter_locale);
1819 }
1820 
1821 
1822 /*! @} */
1823 
1824 /* EOF */
core_hierarchy_element::header
struct core_article_header * header
Definition: core.h:143
fu_write_to_filedesc
int fu_write_to_filedesc(int filedesc, const char *buffer, size_t len)
Write data block to filedescriptor.
Definition: fileutils.c:552
filter_get_score
int filter_get_score(const struct core_hierarchy_element *he)
Get article score.
Definition: filter.c:1685
filter_match_own
int filter_match_own(const struct core_hierarchy_element *he)
Check for own article.
Definition: filter.c:1595
enc_free
void enc_free(void *p)
Free an object allocated by encoding module.
Definition: encoding.c:7856
fu_assign_stream
int fu_assign_stream(int filedesc, FILE **stream, const char *mode)
Assign I/O stream to open file.
Definition: fileutils.c:380
filter_get_locale_ctype
enum filter_cs filter_get_locale_ctype(void)
Get codeset of locale category LC_CTYPE.
Definition: filter.c:1816
enc_mime_cs
enc_mime_cs
IDs for supported MIME character sets.
Definition: encoding.h:59
fu_lock_file
int fu_lock_file(int filedesc)
Lock file for writing.
Definition: fileutils.c:335
filter_exit
void filter_exit(void)
Shutdown filter module.
Definition: filter.c:1341
enc_ascii_check
int enc_ascii_check(const char *s)
Verify ASCII encoding.
Definition: encoding.c:3922
config
struct conf config[CONF_NUM]
Global configuration.
Definition: conf.c:63
conf_entry_val::s
char * s
Definition: conf.h:105
core_article_header::subject
const char * subject
Definition: core.h:112
CONF_FROM
Definition: conf.h:46
CONF_SCORERC
Definition: conf.h:50
core_article_header::groups
const char ** groups
Definition: core.h:110
enc_wm_pattern
Wildmat array element (for RFC 3977 wildmat-pattern)
Definition: encoding.h:139
main_debug
int main_debug
Enable additional debug output if nonzero.
Definition: main.cxx:64
CONF_REPLYTO
Definition: conf.h:47
core_article_header::from
const char * from
Definition: core.h:111
core_hierarchy_element::parent
struct core_hierarchy_element * parent
Definition: core.h:145
fu_create_path
int fu_create_path(const char *path, api_posix_mode_t perm)
Create path.
Definition: fileutils.c:122
core_article_header::refs
const char ** refs
Definition: core.h:119
MAIN_ERR_PREFIX
#define MAIN_ERR_PREFIX
Message prefix for FILTER module.
Definition: filter.c:58
filter_check_testgroup
int filter_check_testgroup(const char *group)
Check for test group.
Definition: filter.c:1535
fu_unlink_file
int fu_unlink_file(const char *pathname)
Unlink file.
Definition: fileutils.c:362
core_hierarchy_element
Node in article hierarchy.
Definition: core.h:136
PRINT_ERROR
#define PRINT_ERROR(s)
Prepend module prefix and print error message.
Definition: main.h:19
data
struct core_data data
Global data object (shared by all threads)
Definition: core.c:242
enc_destroy_wildmat
void enc_destroy_wildmat(struct enc_wm_pattern **obj, int num)
Destroy wildmat pattern array.
Definition: encoding.c:4516
xdg_get_confdir
const char * xdg_get_confdir(const char *)
Get configuration directory.
Definition: xdg.c:116
xdg_append_to_path
int xdg_append_to_path(const char **, const char *)
Append path component to buffer.
Definition: xdg.c:55
enc_create_wildmat
int enc_create_wildmat(struct enc_wm_pattern **obj, const char *wm)
Create wildmat pattern array.
Definition: encoding.c:4349
filter_init
int filter_init(int utf8)
Initialize filter module.
Definition: filter.c:1051
fu_sync
int fu_sync(int filedesc, FILE *stream)
Flush buffers of file.
Definition: fileutils.c:409
fu_close_file
void fu_close_file(int *filedesc, FILE **stream)
Close file (and potentially associated I/O stream)
Definition: fileutils.c:297
conf::val
union conf_entry_val val
Definition: conf.h:113
FILTER_PERM
#define FILTER_PERM
Permissions for score file.
Definition: filter.c:61
CONF_TESTGRP_ERE
Definition: conf.h:61
filter_match_reply_to_own
int filter_match_reply_to_own(const struct core_hierarchy_element *he)
Check for reply to own article.
Definition: filter.c:1643
ENC_CS_ISO8859_1
Definition: encoding.h:63
enc_convert_to_8bit
const char * enc_convert_to_8bit(enum enc_mime_cs *charset, const char *s, const char **cs_iana)
Convert string from Unicode (UTF-8 NFC) to an 8bit character set.
Definition: encoding.c:4991
core_article_header::msgid
const char * msgid
Definition: core.h:109
fu_read_whole_file
int fu_read_whole_file(int filedesc, char **buffer, size_t *len)
Read text file content and store it into memory buffer.
Definition: fileutils.c:452
fu_open_file
int fu_open_file(const char *pathname, int *filedesc, int mode, api_posix_mode_t perm)
Open file.
Definition: fileutils.c:246
print_error
void print_error(const char *)
Print error message.
Definition: main.cxx:276

Generated at 2026-01-27 using  doxygen