# DNEWS 5.n - Filter Rules # The author is solely responsible for the contents of this file. # # Comments, corrections and additions welcome # Contact: Doug Mackall # Last modified : July 5, 1999 # # NOTES: # Will filter misplaced binaries and HTML articles by default # Every effort has been made to eliminate false positives, but # it's almost certain that some will occur. # History: # # 3/29/99 # # Removed facist Subject: line filters using $sex # Updated badguys expression to reflect current crop of spammers # Expanded $bin_allowed to include stupid spellings of "binaries" # # 4/09/99 # # Added reject for isimage to supplement binary filter # Changed $poison_groups # # 4/12/99 # Fixed admin & reports groups # # 4/16/99 # Additions to badguys regex # Expanded $tlds # Added crude but effective 'pheremone' spam filter # # 5/30/99 # Added shunned sites list # # 6/2/99 # Began adding article scoring # Added body keyword search w/scoring # # 6/17/99 # Added message-id and NPH checking # Added code to drop spam cancels for some filtered articles # set up config flags setflag("config_block_binaries") setflag("config_binaries_in_mod_groups") setflag("config_block_mime_html") setflag("config_block_html") setflag("config_block_late_cancels") setflag("config_drop_control_with_supersedes") setflag("config_drop_useless_controls") setflag("config_drop_ihave_sendme") setflag("do_bot_checks") setflag("shun") # Clear article's score values set("score",0) set("mmf_score",0) # alphanumeric characters $w = "[a-zA-Z0-9_]" $max_encoded_lines = "15" $maxcross = "14" # Sites to shun (Passive UDP) # # Breakdown of shunned sites: # sol.net!colby.direct.ca is an impossible path, and is forged by certain spammers # Likewise for the dsrs.nntp.sol.net path # colby.direct.ca is not a production machine, and has no outbounds, hence should not appear in any path $config_shunned_sites = "sol\.net!colby\.direct\.ca" \ + "|dsrs\.nntp\.sol\.net!newsfeed" # binaries allowed if groups match $bin_allowed = "*binaries*,*binaires*,*bainaries*,*nospam*,binaeries*,alt.sex.pictures*,fur.artwork*,alt.anonymous.messages*,de.alt.dateien*,rec.games.bolo,comp.security.pgp.test,sfnet.tiedostot" # reject all articles crossposted to groups matching this $poison_groups ="^alt\.sex\.cancel,alt\.binaries\.pictures\.erotica\.cancel,alt.sexzilla*" # no checks done if groups match $skipall $skipall = "clari.*,biz.clarinet.*" # exceptions to spam rules see http://netwinsite.com/dnews/rules.htm Special Flags section $skipdup = "" $skipfrom = "" $skipfilter = "" # HTML allowed here (if config_block_html is set) $html_allowed = "microsoft.*" # domains starting/ending in "xxx" are never good news # (checked against .com, .net, and .nu tld's only) $config_baddomainpat = $w+"+xxx|xxx"+$w+"+" # block cancels with these in the path $config_bad_cancel_paths = "winternet\.com|news\.ifcss\.org|ftp\.uu\.net" \ + "|netvigator\.com|vsnl\.net\.in|pacific\.net\.sg" \ + "|relay\.nakhodka\.ru|hipcancel|h1pcr1me|news\.pace\.edu" \ + "|news\.ncd\.co\.jp|ubc\.co\.jp|paradise\.co\.jp" \ + "|news\.softandco\.fr|news4\.netsgo\.com|riksbyggen\.se|crl\.aecl\.ca" # net-abuse groups get some special treatment $net_abuse_groups = "news.admin.net-abuse.usenet,de.admin.net-abuse," \ + "news.admin.net-abuse.bulletins" # groups expected to contain bodies and/or subject lines from spam $spam_report_groups = "news.admin.net-abuse.sightings,de.admin.net-abuse," \ + "fr.usenet.abus.rapports,news.lists.filters," \ + "alt.nocem.misc,news.admin.net-abuse.bulletins," # used to form domain names for filtering $config_badguys = "espdirect|esphoto" \ + "|hardcoreonline|tiffini|highheel|jaysnet|sugarboy[sz]" \ + "|clit|fuck|69|aardvarks|samantha-at-home|\@duno" \ + "|backdoor|nospam\.college-girls|somethingfunny" \ + "|adultsex|snatchpatch|clubteensex|Femwear" \ + "|("+$w+"+\.)?barditt?ch|"+$w+"+\.quim|holowww|"+$w+"+\.holowww" \ + "|answerme|latexfetish|nymphette|bondage|6t9|nudesights" \ + "|porngodess|phatt|rawxxxfun|porn.?king|dreamlands|youwish|uwish" \ + "|dirtysecrets|harddicks|"+$w+"+\.mnet1|pictureview|postagent" \ + "|malebytes|southcorp|ucla\.dorms|bmc-engineering|orchidvideos" \ + "|sexplosion|jalapeno|adammale" \ + "|forbiddenphotos|simplecom|mallpage|yes-pheromones|4jon" \ + "|headhunter|conline|adultserv|theadultstore|femaleseduction|thescent" \ + "|crystalsplace|clit|phone.?sluts|hard.?core|panties|barginbimbos|wank.?it" \ + "|spank.?it|stroke.?it|whack.?it|sluts4u|babelicious|sluts|dunny.?sluts" \ + "|jamieonline|plaything|uksx\.websx|lunar-empire" \ + "|strippers|horny|virgins|ukgirls" \ + "|perverts-r-us|slaveboy\.queenbee|villageteens" \ + "|babes|lickme|xxxhyperlinks" \ + "|hollywoodunderground|cyber-legs" # Spam domain/Spambot Message-IDs for quick vaporization of spam, along with their # $alz cancels $config_bad_mids = "porno\.com|sweetbox\.com|\@hornyslut\.com" \ + "|europorn\.com|\@slut.?\.com|\@ukams\.com|no(where|ne)\d+\.yet|\@pron\.com" \ + "|ladybwear|badpuppy\.com|loving.?\.sexnow.?\.ya" \ + "|freepictures|jus.?.?\.doi.?.?\.to|great\.site|webbinaries" \ + "|yad.?.?.?\.ion.?.?\.org|freehidden|joy.?.?\.to.?.?\.al|from.?behind" \ + "|love(youhon|ergirl|chatting|stofuck)|forever\.yours|\@ju.?.?\.sex|town.\girl|beachbums" \ + "|perverts\-r\-us\.com|fantasyart|amateur|mycock|pornfeeds" \ + "|\@clit\.|bigtits|officegirl.?\.co|redheads|anonymousmessages|amheav(en)?\.co" \ + "|pornpi(x|cs)|fuckme|gay(PerView|mexico)|bodylinks" \ + "|sex(mail|.?xy)|free4u|\@for\.you|\fuck\.com|just\.in\.case|get\.it\.on" \ + "|assfuck|cumsee|loulou\.lo|time\.to\.fly|\.cum\.com|titpatrol|\@Geisha" \ + "|goodfuck|fuckingslut|forgoodtime|goodjob\.nice|in.?.?\.and\.out.??" \ + "|mlm_world|(cash_|\@)mlm\.com|dirtydiana|(the|sex)shoppe|adultoz" \ + "|compu-scans|wilddate|u4me|letsmakelove" \ + "|myhideout|truetoyou|littlegirls|sexy(one|_girl_need|surfer)|teenangel" \ + "|lickme|livecam|eurosex|(bi|sub|dom)-(f|m|cpl)|greatlover|freesex" \ + "|bemybaby|ineedu|desireme|dream(g(uy|al|irl)|mall)|takeme|onlyus|iwantitbad" \ + "|(www|post|mail|MS|virtual|serv)\.(\d*spinter|\d*compuserver|\d*msnn|\d*aoll|\d*primer)\.(com|net)" \ + "|seducewomen|ultrapi(c|x)|seemeeather|(wet|lickmy)pussy|cum.?in.?m" \ + "|wicked-dreams|celebs\.n\.models|iowntheweb" \ + "|me4u|ohbaby|cyberdream\.com|HornyChristy" \ + "|longdong\.com|cheerleader|bigcockwanted|eatmypu[sz][sz]y|rudehouse" \ + "|pixmachine|yourmomshouse\.cum|the(hornypound|clubx)\.com|cybere[xr]otica" \ + "|/\@(Elena|Tamara|bitch|Donna|Katty|Narko|Samantha|Chick|Paula)\b/" \ + "|/\@(Stasy|Kara|Karina|Stefany|Diana|Blyat|Maria|Honey|Polina)\b/" \ + "|/\@(Live|Jienna|Jennifer|Alicia|Selena|Barbara|Svetlana|Suka|suzy|Matrisha)\b/" \ + "|/\@(leanna|pron|Kissa|Janet|Silvia|Arletta|Melody|Alicia|lucy|Pussycat)\b/" \ + "|/\@(Jenna|O-doll|this\.site|oreo)\b/|videodungeon|8teens\.com|stockingheaven" \ + "|wildthing\.com|pixnews99\.com|adameve\.com|NaughtyNikki|SexStreet" \ + "|JanesJungle|ChicsonChics|Voyeur|PeeCam|HiddenPee|Diamond-Dolls" \ + "|SeMe(Shower|Pee|Tan)|(whore|brit)boyz|SecretaryCam|SuitGuy|orientalsexsecrets" # Source Hosts we want nothing to do with $config_bad_nph = "^203\.197|^202\.54\." # some stuff for regexps $tlds = "com|net|org|edu|nl|de|no|dk|ch|com\.au|se|co|cum|c0" $ip = "\d{2,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b" $url = "([Hh][Tt][Tt][Pp]:\/\/?|[Ww]{3})(["+$w+"\-\.]+\."+ $tlds +"|" + $ip + ")" $url2 = "[Hh][Tt][Tt][Pp]:\/\/?(["+$w+"\-\.]+\." + $tlds + "|" + $ip + ")" $ci_begin = "[Bb][Ee][Gg][Ii][Nn]" $ci_ctype = "[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]" $ci_cte = "[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Rr][Aa][Nn][Ss][Ff][Ee][Rr]-[Ee][Nn][Cc][Oo][Dd][Ii][Nn][Gg]" $ci_txht = "([Tt][Ee]?[Xx][Tt]|[Hh][Tt][Mm][Ll]?|[Uu][Rr][Ll])" $ci_html = "[Tt][Ee][Xx][Tt]\/[Hh][Tt][Mm][Ll]" $base64_chars = "[A-Za-z0-9\+\/]" # some nifty regular expressions $sex = "fuck|xxx|sex" $free = "free(?!dom|bsd|nix|serve)" $pics = "pi[cx]" $desc1 = "hard.?core|teen|asian|extreme|live|outrageous|nasty|awesome|" + $free + "|adult" $sex_adjs = $desc1 + "|" + $sex + "|erotic|gay|amateur|lesbian|blow.?job|fetish|pre.?teen|nude" \ + "|celeb|school.?girl|bondage|rape|torture|anal|facial|scat|bisexual" \ + "|masturbat(e|ion|ing)|masterbat(e|ion|ing)|incest|horny" \ + "|p.?h.?e.?r.?e.?m.?o.?n.?e|trans.?sexual" $site_desc = $desc1 + "|password" $servPre = "(" + $free + "|cheap|unlimited|nationwide|" + $site_desc + ")" $servPost = "(" + $free + "|minute|samples|800|900|no.?charge)" $servStr = "(phone.{0,15}(" + $sex + "|fun)|(adult|r.?a.?p.?e|" + $sex + ").{0,10}(chat|site)" \ + "|(" + $sex + ").{0,15}(show|call|connection|vid(eo|s))" \ + "|hard.?core.(vid(eo|s)|amateur)|900.dateline|(mass|bulk).e?-?mail)" $services = "(" + $servPre + ".{0,30}" + $servStr + ")|(" + $servStr + ".{0,30}" + $servPost + ")" $free_stuff = $free + ".{0,20}(password|membership|" + $pics + "|chat)" \ + "|(100\%|total|complete|absolut|all).{0,15}" + $free \ + "|no.{0,6}(a(ge|dult).(verification|check)|avs)" $porn = "(" + $sex_adjs + ").{0,25}(" + $pics + "|video|image|porn|photo|mpeg)" # Pheremone/seduction spammer stuff $pheremone = "s.?e.?d.?u.?c.?e.? women.? easy|more.? women.? easily" \ + "|subliminal music messages|s.?e.?d.?u.?c.?e.? more.? women" \ + "|want.? more.? dates.?|m.?e.?e.?t.? and.? d.?a.?t.?e.? more" \ + "|a.?t.?t.?r.?a.?c.?t.? more.? women|v.?i.?g.?o.?r.?e.?x|H.?E.?R.?B.?A.?L.? V.?I.?A.?G.?R.?A" $pheremone_case = "P.?H.?E.?R.?M.?O.?N.?E.?S|A.?P.?H.?R.?O.?D.?I.?S.?I.?A.?C.?" # process shun, nph and mid lists first. Gets articles and controls. if (isflag("shun")) then if (rexp("Path",$config_shunned_sites)) reject "=== Shunned site" end if if (rexp("Message-ID",$config_bad_mids)) reject "=== Spam M-ID" if (rexp("NNTP-Posting-Host",$config_bad_nph)) reject "=== Bad Posting Host" if (!exists("Control")) then # process non-control articles if (rexp("Subject",$pheremone)) reject "=== Pheremone Spam" if (rexp_case("Subject",$pheremone_case)) reject "=== Pheremone Spam #2" if (rexp("Subject",$services)) reject "=== Subject in $services" if (rexp("Subject",$site_desc + ".{0,20}site")) reject "=== Subject in $site_desc.{0,20}site" if (rexp("Subject", "(" + $free_stuff + "|" + $porn + ")")) reject "=== Subject in $free_stuff|$porn" if (matchall("newsgroups",$skipall)) then setflag("skip_filter") setflag("skip_from") setflag("skip_dup") end if if (matchall("newsgroups",$skipdup)) setflag("skip_dup") if (matchall("newsgroups",$skipfilter)) setflag("skip_filter") if (matchone("From",$skipfrom)) setflag("skip_from") if (ishtml()) and (!matchall("newsgroups",$html_allowed)) reject "=== HTML not in $html_allowed groups" if (isbinary()) and (!matchall("newsgroups",$bin_allowed)) reject "=== Binary not in $bin_allowed groups" if (isimage()) and (!matchall("newsgroups",$bin_allowed)) reject "=== Binary Image not in $bin_allowed groups" if (matchone("newsgroups",$poison_groups)) reject "=== Item is in $poison groups" if (matchall("newsgroups",$net_abuse_groups)) setflag("abuse") if (matchall("newsgroups",$spam_report_groups)) setflag("reports") if (matchone("newsgroups","news.answers")) setflag("faq") if (allmod("newsgroups")) setflag("gr_allmod") if (matchall("newsgroups",$bin_allowed)) setflag("gr_binary") if (matchall("newsgroups",$html_allowed)) setflag("gr_html") if (exists("Supersedes")) then if (rexp("Path",$config_bad_cancel_paths)) reject "=== Supersede from bad cancel path" end if if (isflag("do_bot_checks")) then if (rexp("Organization","email\s+platinum")) reject "=== Bot signature - Platinum" if (rexp("X-Newsreader","^2\.\d\.(\d\d? [A-Z]|\d\d?)$")) reject "=== Bot signature - 2.0.x" if (rexp("X-Newsreader","newsgroup\s+bulk\s+mailer")) reject "=== Bot signature - Bulk Mailer" if (isin("X-Newsreader","calvacade 98")) reject "=== Bot signature - Calvacade 98" if (isin("X-Newsposter","atomicpost")) reject "=== Bot signature - AtomicPost" if (rexp("Message-ID","<\d{12}\@[A-Z]{10}>|\@\d+>")) reject "=== Bot signature - Message-ID pattern" if (rexp("Organization","")) and \ (rexp("Message-ID","<(\d{12}|\d{8}\.\d{4})\@")) reject "=== Bot signature - Org/MID bot pattern" if (rexp("Message-ID","msgidabcxyz\.com")) reject "=== Bot signature - msgidabcxyz" if (rexp("Message-ID","none\d+\.yet>")) reject "=== Bot signature - none##.yet" if (rexp("Message-ID","nowhere\d+\.yet>")) reject "=== Bot signature - nowhere##.yet" if (rexp("Organization","^[A-Z]{2,3}\sInc\.\s*$")) reject "=== Bot signature - Adult sights" if (rexp("Organization","repost.*unauthorized.*cancel")) reject "=== Bot signature - Repost" end if if (rexp("organization","((\b" + $config_badguys + ")\.(" + $tlds + ")\b)")) reject "=== Spam domain" if (rexp("from","(\b(" + $config_badguys + ")\.(" + $tlds + ")\b)")) reject "=== Spam domain" if (rexp("NNTP-Posting-Host","(\b(" + $config_badguys + ")\.(" + $tlds + ")\b)")) reject "=== Spam domain" if (rexp("message-id","(\b(" + $config_badguys + ")\.(" + $tlds + ")>)")) reject "=== Spam domain" if (lines()>8000) and (size()<40000) reject "=== Lots of short lines" # uuencoded html, text, url files if (lines()>3) and (lines()<350) then if (isencodedtext()) reject "=== UUEncoded text/html" if (isencodedurl()) reject "=== UUEncoded url" end if # binaries in non-binary newsgroups if (isflag("config_block_binaries")) then if (!isflag("config_binaries_in_mod_groups")) then if (lines() > $max_encoded_lines) then if (!isflag("gr_binary")) and (isbinary()) reject "=== Binary in non-binary group" end if end if if (!isflag("gr_allmod")) then if (lines() > $max_encoded_lines) then if (!isflag("gr_binary")) and (isbinary()) reject "=== Binary in non-binary group" end if end if end if # mime-encapsulated HTML if (isflag("config_block_mime_html")) then if (rexp("Content-Disposition","filename.*\.html?")) reject "=== Misc HTML spam" if (rexp("Content-Base","file:.*\.html?")) reject "=== Misc HTML spam" if (rexp("content-type","multipart\/(mixed|related)")) and \ (isencodedhtml()) \ reject "=== Misc HTML spam" end if # HTML (and multipart/alternative) if (isflag("config_block_html")) then if (!isflag("gr_html")) then if (rexp("content-type","text\/html|multipart\/alternative")) reject "=== HTML post" if (rexp("content-type", "multipart\/(mixed|related)")) and \ (isencodedhtml()) \ reject "=== HTML post" end if end if # if we got past all those then accept the message accept "Article OK" end if # process articles that contain control headers # examine cancel messages if (rexp("Control","^\s*cancel")) then # Ignore spam cancels for shunned sites if (isflag("shun")) then if (exists("X-Original-Path")) then if (rexp("X-Original-Path",$config_shunned_sites)) reject "=== Cancel for shunned site" end if end if if (exists("X-Original-NNTP-Posting-Host")) then if (rexp("X-Original-NNTP-Posting-Host",$config_bad_nph)) reject "=== Cancel for $bad_nph" end if if (isflag("config_drop_control_with_supersedes")) then if (exists("Supersedes")) reject "=== Cancel with Supersedes header" end if if (rexp("Organization","HipCrime")) reject "=== Rogue cancel (HipCrime)" if (rexp("From","HipCrime")) reject "=== Rogue cancel (HipCrime)" if (rexp("Path","((hacker|crack|porn|cripple|gimp|cunt|hole|fag|aids|faq|god|hindu|dothead|jew|kike|moslem|towelhead|nazi|kraut|nerd|geek|nigger|redneck|rice|slanteye|spick|whine)cancel|cyberwhin(er|ing))")) \ reject "=== Rogue cancel" if (rexp("Path",$config_bad_cancel_paths)) reject "=== Rogue cancel - path" if (rexp("NNTP-Posting-Host",$config_bad_cancel_paths)) reject "=== Rogue cancel - posting-host" if (exists("X-Cancelled-By")) then if (!rexp("X-Cancelled-By",$w+"\@"+$w)) reject "=== Rogue cancel - Bad X-Cancelled-By" end if if (exists("X-Canceled-By")) then if (!rexp("X-Canceled-By",$w+"\@"+$w)) reject "=== Rogue cancel - Bad X-Canceled-By" end if end if # examine new/rmgroup control messages if (rexp("Control","^\s*(new|rm)group\s")) then if (isflag("config_drop_control_with_supersedes")) then if (exists("Supersedes")) reject "=== New/Rmgroup with Supersedes header" end if if (rexp("Distribution","collabra-internal")) reject "=== Bogus control message from Collabra luser" if (rexp("body","Control message generated by Netscape Collabra Server")) reject "=== Bogus control message from Collabra luser" if (rexp("Control","(new|rm)group\s(comp|misc|news|rec|soc|sci|humanities|talk)")) then if (!rexp("From","group-admin\@isc\.org")) reject "=== Big 8 control message from wrong address" if (rexp("Organization","Cabal")) reject "=== Bogus big-8 control message (Cabal)" if (rexp("body","Meow")) reject "=== Bogus big-8 control message (Meow)" else if (rexp("From","(group-admin|tale)\@isc\.org|tale\@uunet\.uu\.net")) \ reject "=== Forged non-big-8 control message supposedly from tale" end if if (!exists("Approved")) reject "=== Unapproved control message" end if # examine other control messages if (rexp("Control","^\s*(sendsys|senduuname|version)")) then if (isflag("config_drop_useless_controls")) reject "=== Bad control message - Unwanted message" if (isflag("config_drop_control_with_supersedes")) then if (exists("Supersedes")) reject "=== Control message with Supersedes header" end if end if if (rexp("Control","^\s*(ihave|sendme)")) then if (isflag("config_drop_ihave_sendme")) reject "=== Bad control message - Unwanted message" if (isflag("config_drop_control_with_supersedes")) then if (exists("Supersedes")) reject "=== Control message with Supersedes header" end if end if # if we got past all those then accept the control article accept "Control OK"