shelltool/pixiv/pixiv.sh

#!/bin/bash

config_file=$1
if [ -z $config_file ]
then
   echo 'The cache directory must be specified'
   exit 1
fi
if [ ! -f $config_file ]
then
        echo 'config file：$config_file not found'
        exit 1
fi
if [ ! -f `which jq` ]
then
        echo 'To run this script, you need to install jq，https://stedolan.github.io/jq/'
        exit 1
else
        echo "jq version:`jq --version`"
fi
if [ ! -f `which xmlstarlet` ]
then
        echo 'To run this script, you need to install xmlstarlet，http://xmlstar.sourceforge.net/docs.php'
        exit 1
else
        echo "xmlstarlet version:`xmlstarlet --version`"
fi


# ==================== Config ====================
Host=`cat $config_file | jq -r .aliyun_oss.Host`
accelerateHost=`cat $config_file | jq -r .aliyun_oss.accelerateHost`
bucketname=`cat $config_file | jq -r .aliyun_oss.bucketname`
AccessKeyId=`cat $config_file | jq -r .aliyun_oss.AccessKeyId`
AccessKeySecret=`cat $config_file | jq -r .aliyun_oss.AccessKeySecret`
db_file=`cat $config_file | jq -r .aliyun_oss.db`
db_file_err=$db_file.err
imageParam=`cat $config_file | jq -r .aliyun_oss.imageParam`
backup_time=`cat $config_file | jq -r .aliyun_oss.backup_time`
error_file=`cat $config_file | jq -r .aliyun_oss.error_file`

convertio_apikey=`cat $config_file | jq -r .convertio.apikey`

CronitorKey=`cat $config_file | jq -r .Cronitor.API_KEY`
CronitorJobName=`cat $config_file | jq -r .Cronitor.JOB_NAME`
# ================================================

if [ ! -f $db_file ]
then
   touch $db_file
fi

function PutObject(){
        VERB="PUT"
        file=$1
        Content_MD5=""
        Content_Type=`file -b --mime-type $file`
        Date=`TZ=GMT env LANG=en_US.UTF-8 date +'%a, %d %b %Y %H:%M:%S GMT'`
        CanonicalizedOSSHeaders="x-oss-object-acl:public-read\n"
        CanonicalizedResource="/$bucketname/$file"
        stringToSign="$VERB\n$Content_MD5\n$Content_Type\n$Date\n$CanonicalizedOSSHeaders$CanonicalizedResource"
        Signature=`echo -en $stringToSign | openssl sha1 -hmac $AccessKeySecret -binary | base64`
        Authorization="OSS $AccessKeyId:$Signature"
        http_code=`curl -v -w "%{http_code}" -X $VERB -H "HOST:$bucketname.$Host" -H "x-oss-object-acl:public-read"  -H "Date:$Date" -H "Content-Type:$Content_Type" -H "Authorization:$Authorization" --data-binary "@$file" "https://$bucketname.$Host/$file"`
        if [ $http_code -eq "200" ]
        then
           rm -f $file
        else
           echo $file>>$db_file_err
        fi
}

function GetBucketV2(){
   VERB="GET"
   Content_MD5=""
   Content_Type=""
   Date=`TZ=GMT env LANG=en_US.UTF-8 date +'%a, %d %b %Y %H:%M:%S GMT'`
   CanonicalizedOSSHeaders=""
   CanonicalizedResource="/$bucketname/"
   stringToSign="$VERB\n$Content_MD5\n$Content_Type\n$Date\n$CanonicalizedOSSHeaders$CanonicalizedResource"
   Signature=`echo -en $stringToSign | openssl sha1 -hmac $AccessKeySecret -binary | base64`
   Authorization="OSS $AccessKeyId:$Signature"
   curl -v -H "HOST:$bucketname.$Host" -H "Date:$Date" -H "Authorization:$Authorization" "https://$bucketname.$Host/?list-type=2"
}

function DeleteMultipleObjects(){
      file="DeleteMultipleObjects.temp"
      GetBucketV2|xmlstarlet select -t -m '/ListBucketResult//Contents' -v 'Key' -o ' ' -v 'LastModified' -n > $file
      # 获取备份时间之前的日期时间戳
      oldTimestamp=`date -d "-$backup_time"  +%s`
      ObjectNode=""
      while read line
      do
        filename=`echo $line|awk '{print $1}'`
        date=`echo $line|awk '{print $2}'`
        timestamp=`date -d "$date" +%s`
        if [[ $filename != "$error_file" && $timestamp -lt $oldTimestamp ]]
        then
          ObjectNode="${ObjectNode}<Object><Key>${filename}</Key></Object>"
        fi
      done < $file
      rm $file

      if [ -n "$ObjectNode" ]
      then
         echo "存在${backup_time}以前的图片"
         compress_xml='<?xml version="1.0" encoding="UTF-8"?><Delete><Quiet>true</Quiet>'${ObjectNode}'</Delete>'
         echo $compress_xml|xmlstarlet format
         VERB="POST"
         Content_MD5=`echo -n $compress_xml |openssl dgst -md5 -binary|base64`
         Content_Type="application/xml"
         Date=`TZ=GMT env LANG=en_US.UTF-8 date +'%a, %d %b %Y %H:%M:%S GMT'`
         CanonicalizedOSSHeaders=""
         CanonicalizedResource="/$bucketname/?delete"
         stringToSign="$VERB\n$Content_MD5\n$Content_Type\n$Date\n$CanonicalizedOSSHeaders$CanonicalizedResource"
         echo "sign:$stringToSign"
         Signature=`echo -en $stringToSign | openssl sha1 -hmac $AccessKeySecret -binary | base64`
         Authorization="OSS $AccessKeyId:$Signature"
         http_code=`curl -X $VERB -v -w "%{http_code}"  -H "HOST:$bucketname.$Host" -H "Date:$Date" -H "Content-Type:$Content_Type" -H "Content-MD5:$Content_MD5" -H "Authorization:$Authorization" -d "$compress_xml" "https://$bucketname.$Host/?delete"`
         if [ $http_code -eq "200" ]
         then
            echo "${backup_time}以前的图片删除成功"
         else
            echo "${backup_time}以前的图片删除失败"
         fi
         echo ''>$db_file
         GetBucketV2|xmlstarlet select -t -m '/ListBucketResult//Contents' -v 'Key' -n|while read line
         do
               if [ $line != "$error_file" ]
               then
                  echo $line >> $db_file
               fi
         done
      else
        echo "没有${backup_time}以前的图片需要删除"
      fi
   }

function fileSizeStr(){
     fileSize=$1
     if [[ $fileSize -gt 1048576 ]]
     then
        echo "`echo "scale=2;$fileSize/1048576"|bc`MB"
     else
        echo "$((fileSize/1024))KB"
     fi
}

function Request(){
   while true
   do
      echo "发送消息命令：$1"
      result=`bash -c "$1"`
      echo "请求响应：$result"
      if [ `echo $result|jq .ok` = true ]
      then
         return 1
      elif [ `echo $result|jq .error_code` -eq 429 ]
      then
         second=`echo $result|jq .parameters.retry_after`
         sleep $second
         echo "${second}后再发起请求"
         Request $1
      else
         echo "执行命令遇到未知错误！"
         return 0
      fi
   done
}

basePath=`cat $config_file|jq -r .basePath`
if [ ! -f $basePath ]
then
        mkdir -p $basePath
fi
cd $basePath

telegramToken=`cat $config_file | jq -r .telegramToken`
baseApi="https://api.telegram.org/bot$telegramToken"
chat_id=`cat $config_file | jq .chatId`
mode=`cat $config_file | jq -r .mode`
content=`cat $config_file | jq -r .content`
rank_url="https://www.pixiv.net/ranking.php?mode=$mode&content=$content&p=1&format=json"
today=`date "+%Y-%m-%d"`
_today=`date "+%Y%m%d"`
rank_json=$today.json

rule='sed -e "s/\\&amp;/\\&/g"|sed -e "s/\\&lt;/\\</g"|sed -e "s/\\&gt;/\\>/g"'

Request "curl -v -d chat_id=$chat_id -d parse_mode=HTML  -d text=\"Pixiv排行榜已更新，开始处理<a href='https://www.pixiv.net/ranking.php?mode%3D$mode%26content%3D$content'>$today日榜</a>数据。#date$_today #日期$_today %0A%0A<strong>排名是什么？</strong>%0A排名是以pixiv上所有公开作品为对象的统计以及排名。%0A毎日0:00～23时59分59秒的阅览树・「赞！」数等为排名的依据，期结果由pixiv独自的算法「pixiv rank β」决定。统计结果于每日中午12:00公开。%0A<a href='https://www.pixiv.help/hc/zh-cn/categories/360001065093-%E6%9C%89%E5%85%B3%E6%8E%92%E8%A1%8C%E6%A6%9C'>有关排行榜</a>\" $baseApi/sendMessage"

if [ ! -f $rank_json ]
then
        echo "get data from $rank_url"
        curl -v $rank_url >$rank_json
fi
length=`jq '.contents|length' $rank_json`

fileList=''
media=''
fileCount=0
maxFileCount=10
tarFile=$today.tar.gz
maxFileSize=20971520
maxFileSize_M="$((maxFileSize/1024/1024))M"
start_rank=''
end_rank=''
maxLikeCount=0
maxLikeCountPid=0
maxBookmarkCount=0
maxBookmarkCountPid=0
maxViewCount=0
maxViewCountPid=0
maxRatio=10
for index in `seq 1 $length`
do
        index=$((index-1))
        pid=`jq  --argjson index $index '.contents[$index].illust_id' $rank_json`
        artworkLink="https://www.pixiv.net/artworks/$pid"
        rank=`jq  --argjson index $index '.contents[$index].rank' $rank_json`
        yes_rank=`jq  --argjson index $index '.contents[$index].yes_rank' $rank_json`

        if [ $(((index+1) % 10)) == 1 ]
        then
           start_rank=$rank
        fi
        if [ $(((index+1) % 10)) == 0 ]
        then
           end_rank=$rank
        fi
        if [ $yes_rank -eq 0 ]
        then
          rank_info="\#排名$rank \#rank$rank \#首次登场"
        else
          rank_info="\#排名$rank \#rank$rank 之前 \#排名$yes_rank \#rank$yes_rank"
        fi
        echo "pid=$pid,artworkLink=$artworkLink,rank_info=$rank_info"
        png_html_file=$pid.html
        if [ ! -f $png_html_file ]
        then
                echo "get data from $artworkLink"
                curl -v $artworkLink >$png_html_file
        fi
        json_file=$pid.json
        if [ ! -f $json_file ]
        then
                egrep -o "content='{\"timestamp.*" $png_html_file|sed -e "s/content='//"|sed -e "s/..$//" >$json_file
        fi
        pageCount=`jq --arg pid $pid '.illust[$pid].pageCount' $json_file`
        original_url=`jq -r --arg pid $pid '.illust[$pid].urls.original' $json_file`
        small_url=`jq -r --arg pid $pid '.illust[$pid].urls.small' $json_file`
        title=`jq -r --arg pid $pid '.illust[$pid].title' $json_file|sed -e 's/\"/\\\"/g'|sed -e 's/&lt;//g'|sed -e 's/&gt;//g'`
        title=`bash -c "echo '$title'|$rule"`
        description=`jq -r --arg pid $pid '.illust[$pid].description' $json_file`
        userName=`jq -r --arg pid $pid '.illust[$pid].userName' $json_file|sed -e 's/\"/\\\"/g'`
        userName=`bash -c "echo '$userName'|$rule"`
        userId=`jq -r --arg pid $pid '.illust[$pid].userId' $json_file`
        likeCount=`jq --arg pid $pid '.illust[$pid].likeCount' $json_file`
        if [ $likeCount -gt $maxLikeCount ]
        then
           maxLikeCount=$likeCount
           maxLikeCountPid=$pid
        fi
        bookmarkCount=`jq --arg pid $pid '.illust[$pid].bookmarkCount' $json_file`
        if [ $bookmarkCount -gt $maxBookmarkCount ]
        then
           maxBookmarkCount=$likeCount
           maxBookmarkCountPid=$pid
        fi
        viewCount=`jq --arg pid $pid '.illust[$pid].viewCount' $json_file`
        if [ $viewCount -gt $maxViewCount ]
        then
           maxViewCount=$likeCount
           maxViewCountPid=$pid
        fi
        tag=`jq -r  --arg pid $pid '.illust[$pid].tags.tags[].tag' $json_file|sed -e 's/^/\\#/g'|sed ':a;N;s/\n/ /;t a;'`
        tag=`bash -c "echo '$tag'|$rule"`

        echo -e "pageCount=$pageCount,original_url=$original_url,small_url=$small_url\n\
        title=$title,description=$description,userName=$userName\n\
        likeCount=$likeCount,bookmarkCount=$bookmarkCount,viewCount=$viewCount\n\
        tag=$tag"

        for page in `seq 1 $pageCount`
        do
                page=$((page - 1))
                page_original_url=`echo $original_url | sed -e "s/p0/p$page/"`
                page_small_url=`echo $small_url | sed -e"s/p0/p$page/"`
                original_file_name=`echo $page_original_url | egrep -o "$pid.*"`
                webp_file_name=`echo $original_file_name|sed 's/jpg/webp/'|sed 's/png/webp/'`

                if [ "`cat $db_file|grep $original_file_name`" != "$original_file_name" ]
                then
                        echo "download image file name=$original_file_name,url=$page_original_url"
                        if [ ! -f $original_file_name ]
                        then
                                curl -v -H 'referer: https://www.pixiv.net/' $page_original_url -o $original_file_name
                        fi
                        original_file_size=`du -b  $original_file_name|awk '{print $1}'`
                        if [ $original_file_size -gt $maxFileSize ]
                        then
                          echo "${original_file_name}文件体积：${original_file_size}字节超过${maxFileSize}字节限制，需要在线压缩"
                          result=`curl -v -X POST -d "{\"apikey\": \"$convertio_apikey\", \"input\":\"upload\", \"outputformat\":\"webp\"}" http://api.convertio.co/convert`

                          if [ `echo $result|jq -r .status` = ok ]
                          then
                             id=`echo $result|jq -r .data.id`
                             result=`curl -v -X PUT --upload-file $original_file_name  http://api.convertio.co/convert/$id/$original_file_name`
                             if [ `echo $result|jq -r .status` = ok ]
                             then
                                while true
                                do
                                   sleep 10
                                   result=`curl -v -X GET http://api.convertio.co/convert/$id/status`
                                   echo "转换响应：$result"
                                   if [[ `echo $result|jq -r .status` = ok && `echo $result|jq -r .data.step` = finish ]]
                                   then
                                       echo $result|jq .data.output.url|xargs curl -v -o $webp_file_name
                                       PutObject $webp_file_name
                                       break
                                   elif [ `echo $result|jq -r .status` = error ]
                                   then
                                       break
                                   else
                                       echo "10s后重新获取转换结果"
                                   fi
                                done
                             else
                                echo "Direct File Upload For Conversion error!!!"
                             fi
                          else
                              echo "Start a New Conversion error!!!"
                          fi
                        else
                          PutObject $original_file_name
                        fi

                fi
                if [ ! -f $webp_file_name ]
                then
                   webp_url="https://$bucketname.$accelerateHost/$original_file_name$imageParam"
                   echo "download image file name=$webp_file_name,url=$webp_url"
                   curl -v $webp_url -o $webp_file_name
                   image_ratio=`file $webp_file_name|awk '{print $9}'|sed -e 's/,//'|sed '/^$/d'|awk '{split($0,a,"x");f=a[1]>a[2]?a[1]/a[2]:a[2]/a[1];print f}'`
                   if [[ -n $image_ratio && `echo "$image_ratio > $maxRatio"|bc` -eq 1 ]]
                   then
                     echo "图片分辨率`file $webp_file_name|awk '{print $9}'|sed -e 's/,//'`比率：$image_ratio>$maxRatio,视为异常，返回异常图片"
                     watermark=`echo "画作pid：$pid"|base64`
                     url="https://$bucketname.$accelerateHost/$error_file?x-oss-process=image/watermark,text_`echo "画作pid：$pid"|base64`,g_north/watermark,text_5YiG6L6o546H5q%2BU5L6L5byC5bi4Cg==,g_center/watermark,text_5bi46KeB5LqO6LaF6ZW_5Zu-,g_south"
                     echo "文字水印请求url：$url"
                     curl -v $url -o $webp_file_name
                   fi
                fi

                if [ $page -eq 0 ]
                then
                        info_file=$original_file_name.info
                        original_image_info=`curl -v --output $info_file https://$bucketname.$Host/$original_file_name?x-oss-process=image/info`
                        FileSize=$(fileSizeStr $(cat $info_file|jq -r  .FileSize.value))
                        ImageHeight=$(cat $info_file|jq -r  .ImageHeight.value)
                        ImageWidth=$(cat $info_file|jq -r  .ImageWidth.value)
                        media="$media,{\"type\":\"photo\",\"media\":\"attach://$webp_file_name\",\"parse_mode\":\"HTML\",\"caption\":\"$rank_info\n<a href=\\\"$artworkLink\\\">$title</a>\n<a href=\\\"https://www.pixiv.net/users/$userId\\\">$userName</a>\n$tag\n原图分辨率：${ImageWidth}X${ImageHeight}(宽X高)，文件体积：$FileSize，<a href=\\\"https://$bucketname.$Host/$original_file_name\\\">预览原图</a>  <a href=\\\"https://$bucketname.$accelerateHost/$original_file_name\\\">加速下载原图</a>\"}"
                        fileList="$fileList -F $webp_file_name=@$webp_file_name"
                        fileCount=$((fileCount + 1))
                        fileSize=`du $webp_file_name | awk '{print $1}'`
                        echo "fileCount=$fileCount"
                fi
                if [[ $fileCount -eq $maxFileCount ]]
                then
                        Request "curl -v -F  chat_id=$chat_id $fileList -F media='[`echo $media | cut -c 2-`]' $baseApi/sendMediaGroup"
                        if [ `echo $?` -eq 1 ]
                        then
                           sleep $sleepImage
                           text="以上作品日榜排名分别是 #排名${start_rank}_${end_rank} #rank${start_rank}_${end_rank} ，点击作品可以查看pid/标题/画师/tag信息."
                        else
                           text="#排名${start_rank}_${end_rank} #rank${start_rank}_${end_rank}作品上传失败，请联系管理员"
                        fi

                        Request "curl -v -d chat_id=$chat_id -d text=\"$text\" $baseApi/sendMessage"

                        fileList=''
                        media=''
                        fileCount=0
                fi
        done
done


next_expected_at=`curl -v https://cronitor.io/api/monitors/$CronitorJobName -u $CronitorKey:''|jq .next_expected_at`
Request "curl -v -d chat_id=$chat_id -d text=\"以上就是$today日榜前${length}名作品，本次推送完毕，下次推送时间预计是`date -d @$next_expected_at '+%Y-%m-%d %H:%M:%S'`，如有问题请联系管理员。 #date$_today  #日期$_today \" $baseApi/sendMessage"

find -type f  -mtime +7|grep html|xargs rm -f
find -type f  -mtime +7|grep json|xargs rm -f
find -type f  -mtime +7|grep webp|xargs rm -f
find -type f  -mtime +7|grep info|xargs rm -f
DeleteMultipleObjects