将nginx日志中的蜘蛛记录删除掉

#!/bin/sh
if [ -r $1 ]; then
    #Delete Baiduspider
    baidu=$(grep -c ‘Baiduspider‘ $1)
    if [ $baidu -gt 0 ]; then
        sed -i ‘/Baiduspider/d‘ $1
        echo "match Baiduspider $baidu line."
    else
        echo "Baiduspider not found."
    fi
    #Delete YandexBot
    yandex=$(grep -c ‘YandexBot‘ $1)
    if [ $yandex -gt 0 ]; then
        sed -i ‘/YandexBot/d‘ $1
        echo "match YandexBot $yandex line."
    else
        echo "YandexBot not found."
    fi
    #Delete bingbot
    bing=$(grep -c ‘bingbot‘ $1)
    if [ $bing -gt 0 ]; then
        sed -i ‘/bingbot/d‘ $1
        echo "match bingbot $bing line."
    else
        echo "bingbot not found."
    fi
    #Delete EasouSpider
    easou=$(grep -c ‘EasouSpider‘ $1)
    if [ $easou -gt 0 ]; then
        sed -i ‘/EasouSpider/d‘ $1
        echo "match EasouSpider $easou line."
    else
        echo "EasouSpider not found."
    fi
    #Delete Googlebot
    Googlebot=$(grep -c ‘Googlebot‘ $1)
    if [ $Googlebot -gt 0 ]; then
        sed -i ‘/Googlebot/d‘ $1
        echo "match Googlebot $Googlebot line."
    else
        echo "Googlebot not found."
    fi
    #Delete Mediapartners-Google
    mgoogle=$(grep -c ‘Mediapartners-Google‘ $1)
    if [ $mgoogle -gt 0 ]; then
        sed -i ‘/Mediapartners-Google/d‘ $1
        echo "match Mediapartners-Google $mgoogle line."
    else
        echo "Mediapartners-Google not found."
    fi
    #Delete Yahoo
    yahoo=$(grep -c ‘Yahoo‘ $1)
    if [ $yahoo -gt 0 ]; then
        sed -i ‘/Yahoo/d‘ $1
        echo "match Yahoo $yahoo line."
    else
        echo "Yahoo not found."
    fi
    #Delete YoudaoBot
    youdao=$(grep -c ‘YoudaoBot‘ $1)
    if [ $youdao -gt 0 ]; then
        sed -i ‘/YoudaoBot/d‘ $1
        echo "match YoudaoBot $youdao line."
    else
        echo "YoudaoBot not found."
    fi
    #Delete XoviBot
    xovi=$(grep -c ‘XoviBot‘ $1)
    if [ $xovi -gt 0 ]; then
        sed -i ‘/XoviBot/d‘ $1
        echo "match XoviBot $xovi line."
    else
        echo "XoviBot not found."
    fi
    #Delete MSNBot
    msn=$(grep -c ‘MSNBot‘ $1)
    if [ $msn -gt 0 ]; then
        sed -i ‘/MSNBot/d‘ $1
        echo "match MSNBot $msn line."
    else
        echo "MSNBot not found."
    fi
    #Delete Sogou web spider
    sogou=$(grep -c ‘Sogou web spider‘ $1)
    if [ $sogou -gt 0 ]; then
        sed -i ‘/Sogou web spider/d‘ $1
        echo "match Sogou web spider $sogou line."
    else
        echo "Sogou web spider not found."
    fi
    #Delete JikeSpider
    jike=$(grep -c ‘JikeSpider‘ $1)
    if [ $jike -gt 0 ]; then
        sed -i ‘/JikeSpider/d‘ $1
        echo "match JikeSpider $jike line."
    else
        echo "JikeSpider not found."
    fi
    #Delete proximic
    proximic=$(grep -c ‘proximic‘ $1)
    if [ $proximic -gt 0 ]; then
        sed -i ‘/proximic/d‘ $1
        echo "match proximic $proximic line."
    else
        echo "proximic not found."
    fi
    #Delete 360Spider
    so360=$(grep -c ‘360Spider‘ $1)
    if [ $so360 -gt 0 ]; then
        sed -i ‘/360Spider/d‘ $1
        echo "match 360Spider $so360 line."
    else
        echo "360Spider not found."
    fi
    #Delete 360spider-image
    so360img=$(grep -c ‘360spider-image‘ $1)
    if [ $so360img -gt 0 ]; then
        sed -i ‘/360spider-image/d‘ $1
        echo "match 360spider-image $so360img line."
    else
        echo "360spider-image not found."
    fi
    #Delete YYSpider
    yy=$(grep -c ‘YYSpider‘ $1)
    if [ $yy -gt 0 ]; then
        sed -i ‘/YYSpider/d‘ $1
        echo "match YYSpider $yy line."
    else
        echo "YYSpider not found."
    fi
    #Delete AhrefsBot
    ahrefs=$(grep -c ‘AhrefsBot‘ $1)
    if [ $ahrefs -gt 0 ]; then
        sed -i ‘/AhrefsBot/d‘ $1
        echo "match AhrefsBot $ahrefs line."
    else
        echo "AhrefsBot not found."
    fi
    #Delete msnbot
    msnbot=$(grep -c ‘msnbot‘ $1)
    if [ $msnbot -gt 0 ]; then
        sed -i ‘/msnbot/d‘ $1
        echo "match msnbot $msnbot line."
    else
        echo "msnbot not found."
    fi
    #Delete YisouSpider
    yisou=$(grep -c ‘YisouSpider‘ $1)
    if [ $yisou -gt 0 ]; then
        sed -i ‘/YisouSpider/d‘ $1
        echo "match YisouSpider $yisou line."
    else
        echo "YisouSpider not found."
    fi
    #Delete Facebot
    facebot=$(grep -c ‘Facebot‘ $1)
    if [ $facebot -gt 0 ]; then
        sed -i ‘/Facebot/d‘ $1
        echo "match Facebot $facebot line."
    else
        echo "Facebot not found."
    fi
    #Delete GrapeshotCrawler
    grapeshot=$(grep -c ‘GrapeshotCrawler‘ $1)
    if [ $grapeshot -gt 0 ]; then
        sed -i ‘/GrapeshotCrawler/d‘ $1
        echo "match GrapeshotCrawler $grapeshot line."
    else
        echo "GrapeshotCrawler not found."
    fi
    #Delete WeSEE
    wesee=$(grep -c ‘WeSEE‘ $1)
    if [ $wesee -gt 0 ]; then
        sed -i ‘/WeSEE/d‘ $1
        echo "match WeSEE $wesee line."
    else
        echo "WeSEE not found."
    fi
    #Delete Sogou inst spider
    sogou_inst=$(grep -c ‘Sogou inst spider‘ $1)
    if [ $sogou_inst -gt 0 ]; then
        sed -i ‘/Sogou inst spider/d‘ $1
        echo "match Sogou inst spider $sogou_inst line."
    else
        echo "Sogou inst spider not found."
    fi
    #Delete WebZIP/x.x
    webzip=$(grep -c ‘www.spidersoft.com‘ $1)
    if [ $webzip -gt 0 ]; then
        sed -i ‘/www.spidersoft.com/d‘ $1
        echo "match WebZIP/x.x $webzip line."
    else
        echo "WebZIP/x.x not found."
    fi
    #Delete baidu mobile spider
    bms=$(grep -c ‘www.baidu.com\/search\/spider.html‘ $1)
    if [ $bms -gt 0 ]; then
        sed -i ‘/www.baidu.com\/search\/spider.html/d‘ $1
        echo "match baidu mobile spider $bms line."
    else
        echo "baidu mobile spider not found."
    fi
    #Delete ChinasoSpider
    chinaso=$(grep -c ‘ChinasoSpider‘ $1)
    if [ $chinaso -gt 0 ]; then
        sed -i ‘/ChinasoSpider/d‘ $1
        echo "match ChinasoSpider $chinaso line."
    else
        echo "ChinasoSpider not found."
    fi
    #Delete www.xxx.com
    xxx=$(grep -c ‘www.xxx.com‘ $1)
    if [ $xxx -gt 0 ]; then
        sed -i ‘/www.xxx.com/d‘ $1
        echo "match www.xxx.com $xxx line."
    else
        echo "www.xxx.com not found."
    fi
    #Delete SEOENGWorldBot
    seoeng=$(grep -c ‘SEOENGWorldBot‘ $1)
    if [ $seoeng -gt 0 ]; then
        sed -i ‘/SEOENGWorldBot/d‘ $1
        echo "match SEOENGWorldBot $seoeng line."
    else
        echo "SEOENGWorldBot not found."
    fi
    #Delete Mail.RU_Bot
    mailru=$(grep -c ‘Mail.RU_Bot‘ $1)
    if [ $mailru -gt 0 ]; then
        sed -i ‘/Mail.RU_Bot/d‘ $1
        echo "match Mail.RU_Bot $mailru line."
    else
        echo "Mail.RU_Bot not found."
    fi
    #Delete Girafabot
    gira=$(grep -c ‘Girafabot‘ $1)
    if [ $gira -gt 0 ]; then
        sed -i ‘/Girafabot/d‘ $1
        echo "match Girafabot $gira line."
    else
        echo "Girafabot not found."
    fi
    #Delete DotBot
    dotBot=$(grep -c ‘DotBot‘ $1)
    if [ $dotBot -gt 0 ]; then
        sed -i ‘/DotBot/d‘ $1
        echo "match DotBot $dotBot line."
    else
        echo "DotBot not found."
    fi
    #Delete BLEXBot
    BLEXBot=$(grep -c ‘BLEXBot‘ $1)
    if [ $BLEXBot -gt 0 ]; then
        sed -i ‘/BLEXBot/d‘ $1
        echo "match BLEXBot $BLEXBot line."
    else
        echo "BLEXBot not found."
    fi
else
    echo "please, input your log file"
fi

将nginx日志中的蜘蛛记录删除掉

时间: 2024-10-12 16:10:57

将nginx日志中的蜘蛛记录删除掉的相关文章

Nginx 日志中记录cookie

因开发要求,在Nginx日志中需要记录Cookie信息,以便开发查询系统发生了什么,我的日志是以json格式显示,需要在nginx.conf文件中添加如下信息: 1,#vim  nginx.conf http {    include       mime.types;    default_type  application/octet-stream;   log_format logstash_json '{"@timestamp":"$time_iso8601"

在nginx日志access log可以记录POST请求的参数值

1)      在nginx日志access log可以记录POST请求的参数值 实现程度:日志中可以显示POST请求所提交的参数值 问题: 日志中文显示十六进制(在配置文件中配置中文也无效) 没有对json数据进行测试,正文类型为:Content-Type: application/x-www-form-urlencoded; charset=UTF-8 配置说明: log_format指令用来设置日志的记录格式,语法: log_format name format {format ...}

nginx日志中$request_body 十六进制字符(\\x22) 引号问题处理记录

在使用nginx记录访问日志时,发现在含有 request_body 的 PUT , POST 请求时,日志中会含有 x22 x9B x5C x09 x08 字符,不利于阅读和处理. 具体 支持 request_body 的http method参见 http1.1定义 9 Method Definitions 和 Payloads of HTTP Request Methods nginx.conf 默认access_log 配置 log_format main '$remote_addr -

Nginx日志中request_body为空

部署Nginx,查看Nginx日志的时候,发现request_body的值没有记录下来 Nginx日志: 192.168.1.1--2016-02-24T13:33:54+08:00POST /rate_plan HTTP/1.12002----0.0020.701192.168.1.1--2016-02-24T13:33:54+08:00POST /rate_plan HTTP/1.12002----0.0010.617192.168.1.1--2016-02-24T13:37:44+08:0

awk分析nginx日志中响应时间的方法

针对响应时间慢的问题,我们在nginx日志格式中增加响应时间,现在需要针对响应时间进行分析,查找出相对较慢的响应时间. 1.确认下日志文件格式 日志格式: log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" $http_x_for

用shell分析nginx日志百度网页蜘蛛列表页来访情况

#!/bin/bash #desc: this scripts for baidunews-spider #date:2014.02.25 #testd in CentOS 5.9 x86_64 #saved in /usr/local/bin/baidu-web.sh #written by [email protected] www.zjyxh.com dt=`date -d "yesterday" +%m%d` if [ $1x != x ] ;then   if [ -e $1

nginx日志中得到访问量最高前10个IP

~ cat access.log.10 | awk '{a[$1]++} END {for(b in a) print b"\t"a[b]}' | sort -k2 -r | head -n 10 163.177.71.12   972 101.226.68.137  972 183.195.232.138 971 50.116.27.194   97 14.17.29.86     96 61.135.216.104  94 61.135.216.105  91 61.186.190

Nginx 日志记录post数据,并使用goaccess进行日志分析

nginx日志默认不会记录post数据 在nginx配置文件的http节 log_format 日志格式标识 [escape=json] 日志格式 比如:日志格式标识设置为main,添加escape=json以便中文正确显示(注意,escape=json 需要 nginx 1.11.8 以上版本才支持),记录post和cookie的请求的相信信息 log_format main escape=json '$remote_addr [$time_local] "$request" $st

Flume采集Nginx日志到HDFS

下载apache-flume-1.7.0-bin.tar.gz,用 tar -zxvf 解压,在/etc/profile文件中增加设置: export FLUME_HOME=/opt/apache-flume-1.7.0-bin export PATH=$PATH:$FLUME_HOME/bin 修改$FLUME_HOME/conf/下的两个文件,在flume-env.sh中增加JAVA_HOME: JAVA_HOME=/opt/jdk1.8.0_121 最重要的,修改flume-conf.pr