首页 > 试题广场 >

Nginx日志分析1-IP访问次数统计

[编程题]Nginx日志分析1-IP访问次数统计
  • 热度指数:16345 时间限制:C/C++ 1秒,其他语言2秒 空间限制:C/C++ 256M,其他语言512M
  • 算法知识视频讲解
假设 Nginx 的日志存储在 nowcoder.txt 里,内容如下:
192.168.1.20 - - [21/Apr/2020:14:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.21 - - [21/Apr/2020:15:27:49 +0800] "GET /2/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.22 - - [21/Apr/2020:21:27:49 +0800] "GET /3/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.23 - - [21/Apr/2020:22:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.24 - - [22/Apr/2020:15:27:49 +0800] "GET /2/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.25 - - [22/Apr/2020:15:26:49 +0800] "GET /3/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.20 - - [23/Apr/2020:08:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.21 - - [23/Apr/2020:09:20:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.22 - - [23/Apr/2020:10:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.22 - - [23/Apr/2020:10:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.20 - - [23/Apr/2020:14:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.21 - - [23/Apr/2020:15:27:49 +0800] "GET /2/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.22 - - [23/Apr/2020:15:27:49 +0800] "GET /3/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.25 - - [23/Apr/2020:16:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.24 - - [23/Apr/2020:20:27:49 +0800] "GET /2/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.25 - - [23/Apr/2020:20:27:49 +0800] "GET /3/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.20 - - [23/Apr/2020:20:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.21 - - [23/Apr/2020:20:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.22 - - [23/Apr/2020:20:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.22 - - [23/Apr/2020:15:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.21 - - [23/Apr/2020:20:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
现在需要编写 Shell 脚本统计出 2020 年 4 月 23 号访问 IP 的对应次数,并且按照次数降序排序。你的脚本应该输出:
5 192.168.1.22
4 192.168.1.21
3 192.168.1.20
2 192.168.1.25
1 192.168.1.24

输入描述:
1


输出描述:
2
示例1

输入

192.168.1.20 - - [21/Apr/2020:14:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.21 - - [21/Apr/2020:15:27:49 +0800] "GET /2/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.22 - - [21/Apr/2020:21:27:49 +0800] "GET /3/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.23 - - [21/Apr/2020:22:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.24 - - [22/Apr/2020:15:27:49 +0800] "GET /2/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.25 - - [22/Apr/2020:15:26:49 +0800] "GET /3/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.20 - - [23/Apr/2020:08:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.21 - - [23/Apr/2020:09:20:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.22 - - [23/Apr/2020:10:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.22 - - [23/Apr/2020:10:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.20 - - [23/Apr/2020:14:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.21 - - [23/Apr/2020:15:27:49 +0800] "GET /2/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.22 - - [23/Apr/2020:15:27:49 +0800] "GET /3/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.25 - - [23/Apr/2020:16:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.24 - - [23/Apr/2020:20:27:49 +0800] "GET /2/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.25 - - [23/Apr/2020:20:27:49 +0800] "GET /3/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.20 - - [23/Apr/2020:20:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.21 - - [23/Apr/2020:20:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.22 - - [23/Apr/2020:20:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.22 - - [23/Apr/2020:15:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
192.168.1.21 - - [23/Apr/2020:20:27:49 +0800] "GET /1/index.php HTTP/1.1" 404 490 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"

输出

5 192.168.1.22
4 192.168.1.21
3 192.168.1.20
2 192.168.1.25
1 192.168.1.24
awk '{
    if(substr($4, 2, 11) == "23/Apr/2020") res[$1]++;
}END{
    for(k in res){
        print res[k] " " k
    }
}' | sort -nr -k1

发表于 2021-11-27 00:49:49 回复(0)
希尔排序
declare -A map
while read line
    do
        tmp=($line)
        [[ ${tmp[3]} =~ 23/Apr ]] && ((map["${tmp[0]}"]++))
    done < nowcoder.txt
function ShellSort(){
    tmp=()
    for ve in ${map[*]}	#map的值转到tmp数组里
        do
            tmp[${#tmp[*]}]=$ve
        done
    q=${#tmp[*]}
	for (( gap=q>>1; gap>0; gap=gap>>1 )) #希尔排序,对tmp数组进行排序
	    do
	        for (( i=gap; i<q; i++ ))
	            do
	                temp=${tmp[$i]}
	                for (( a=i-gap; a>=0&temp>${tmp[$a]}; a-=gap ))
	                    do
	                        tmp[$a+$gap]=${tmp[$a]}
	                    done
	                tmp[$a+$gap]=$temp
	            done
	    done
}
ShellSort
for ((i=0; i<$q; i++))
    do
        for ve in ${!map[*]}
            do
                if [ ${tmp[$i]} -eq ${map[$ve]} ];then
                    printf "${map[$ve]} $ve\n"
                fi
            done
    done


发表于 2021-12-03 17:10:42 回复(0)
用awk查出2020年4月23号的访问ip次数然后交给sort进行排序
awk '/\[23\/Apr\/2020/{ip[$1]++}END{for(i in ip){print ip[i]" "i}}' nowcoder.txt | sort -r
发表于 2021-11-23 14:51:13 回复(0)
# 方法1:
grep '23/Apr/2020' nowcoder.txt | awk '{print $1}'   | sort  | uniq -c | sort -rk1 |awk '{print $1" "$2}'

# 方法2:
awk 'BEGIN{}{
    if(substr($4,2,11)=="23/Apr/2020"){
        dict[$1]++
    }
}END{
    for(each_ip in dict){
        printf("%d %s\n", dict[each_ip], each_ip)
    }
}' nowcoder.txt | sort -rk1

发表于 2023-03-21 22:10:35 回复(0)
#!/usr/bin/env bash
awk '$0~/23\/Apr\/2020/{a[$1]+=1}END{for(i in a) print a[i],i}' nowcoder.txt | sort -k1rn
发表于 2022-03-25 13:43:38 回复(0)
grep "23/Apr/2020" | cut -c '-12' | sort |uniq -c | sort -r | sed 's/^ *//'
发表于 2022-02-19 12:39:46 回复(1)
awk '{
  if( $0 ~ "23/Apr/2020") 
      res[$1]++;
}'END'{
for(i in res){
  print res[i] " " i
}
}' nowcoder.txt | sort -nrk1

 grep "23/Apr" nowcoder.txt | awk '{print $1}' | sort | uniq -c | sort -nrk1|awk '{print $1" "$2}'

发表于 2021-12-11 09:39:06 回复(0)
grep '23/Apr/2020' -rw nowcoder.txt | cut -d  ' ' -f 1 | sort | uniq -c | sort -k 2,2n -r | awk '{print $1,$2}'
发表于 2025-07-12 15:03:17 回复(0)
#!/bin/bash

sed -n '/23\/Apr\/2020/p' nowcoder.txt | cut -d' ' -f1 | sort | uniq -c | sort -n -k1,1r | awk '{print $1,$2}'

发表于 2024-09-26 18:16:22 回复(0)
cat "$1" | grep "23/Apr/2020" | gawk '{print $1}' | sort | uniq -c | sort -r | gawk '{print $1,$2}'
发表于 2024-09-10 16:28:55 回复(0)
cat nowcoder.txt|grep '23/Apr/2020'| awk '{print $1}'|sort -n|uniq -c |sort -nr| awk '{print $1,$2}'
发表于 2024-07-29 16:43:37 回复(0)
file="nowcoder.txt"

cat $file |grep '23/Apr/2020'| awk -F ' - - ' '{print $1}' | sort -n | uniq -c |sort -rn | awk '{print $1" "$2}'

发表于 2024-07-04 12:21:33 回复(0)
grep '23/Apr/2020' test23.txt | awk '{print $1}' | sort | uniq -c | sort -k1rn
发表于 2024-06-21 09:49:37 回复(0)
awk '
function bubble_sort(arr, sorted_arr, i, j, temp, n) {
    n = asorti(arr, sorted_arr)  # 对数组按键排序,并返回排序后的数组长度
    for (i = 1; i <= n-1; i++) {
        for (j = 1; j <= n-i; j++) {
            if (arr[sorted_arr[j]] < arr[sorted_arr[j+1]]) {
                temp = sorted_arr[j]
                sorted_arr[j] = sorted_arr[j+1]
                sorted_arr[j+1] = temp
            }
        }
    }
    for (i = 1; i <= n; i++) {
        print arr[sorted_arr[i]],sorted_arr[i]
    }
}

BEGIN {
    FS = " -"  # 设置字段分隔符
}

{
    if ($0 ~ /23\/Apr\/2020/) {  # 匹配特定日期
        if ($1 in arr) {
            arr[$1]++  # 如果 IP 已存在,则计数加1
        } else {
            arr[$1] = 1  # 如果 IP 不存在,则初始化计数为1
        }
    }
}

END {
    bubble_sort(arr)  # 调用冒泡排序函数
}
'

发表于 2024-06-20 22:10:27 回复(0)
grep "23/Apr/2020" |awk -F "- -" '{print $1}' | sort  | uniq -c |sort -r
发表于 2024-06-16 20:44:36 回复(0)
grep "23/Apr/2020" nowcoder.txt |awk -F " " '{print $1}' |sort|uniq -c |sort -r |sed 's/^ *//'
发表于 2024-06-07 16:29:44 回复(0)
为什么这个不行
grep "23/Apr/2020" nowcoder.txt | awk '{print$1}' | sort | uniq -c | sort -r

必须这样才可以
grep "23/Apr/2020" nowcoder.txt | awk '{print$1}' | sort | uniq -c | sort -r | awk '{print$1,$2}'



发表于 2024-05-07 17:50:21 回复(0)
#!/bin/bash
grep "23/Apr/2020" nowcoder.txt |awk -F" " '{print $1}'|sort|uniq -c|sort -rn|awk '{print $1,$2}'
发表于 2024-02-27 09:50:39 回复(0)
awk '{if($0~/23\/Apr\/2020/){print $1}}' nowcoder.txt |sort |uniq -c |sort -rn|awk '{print $1,$2}'
编辑于 2024-01-23 18:15:03 回复(0)
cat nowcoder.txt | grep 23/Apr/2020 | awk -F" - - " '{map[$1]++}END{for(j in map) printf map[j] " " j "\n"}' | sort -nk1r
发表于 2023-09-22 17:37:10 回复(0)