shelltool/pixiv/pixiv.sh

#!/bin/bash

config_file=$1
if [ -z $config_file ]; then
   echo 'The cache directory must be specified'
   exit 1
fi
if [ ! -f $config_file ]; then
   echo 'config file：$config_file not found'
   exit 1
fi
if [ ! -f $(which jq) ]; then
   echo 'To run this script, you need to install jq，https://stedolan.github.io/jq/'
   exit 1
else
   echo "jq version:$(jq --version)"
fi
if [ ! -f $(which xmlstarlet) ]; then
   echo 'To run this script, you need to install xmlstarlet，http://xmlstar.sourceforge.net/docs.php'
   exit 1
else
   echo "xmlstarlet version:$(xmlstarlet --version)"
fi

# ==================== Config ====================
Host=$(cat $config_file | jq -r .aliyun_oss.Host)
accelerateHost=$(cat $config_file | jq -r .aliyun_oss.accelerateHost)
bucketname=$(cat $config_file | jq -r .aliyun_oss.bucketname)
AccessKeyId=$(cat $config_file | jq -r .aliyun_oss.AccessKeyId)
AccessKeySecret=$(cat $config_file | jq -r .aliyun_oss.AccessKeySecret)
db_file=$(cat $config_file | jq -r .aliyun_oss.db)
db_file_err=$db_file.err
imageParam=$(cat $config_file | jq -r .aliyun_oss.imageParam)
backup_time=$(cat $config_file | jq -r .aliyun_oss.backup_time)
error_file=$(cat $config_file | jq -r .aliyun_oss.error_file)

convertio_apikey=$(cat $config_file | jq -r .convertio.apikey)

CronitorKey=$(cat $config_file | jq -r .Cronitor.API_KEY)
CronitorJobName=$(cat $config_file | jq -r .Cronitor.JOB_NAME)

basePath=$(cat $config_file | jq -r .basePath)
if [ ! -f $basePath ]; then
   mkdir -p $basePath
fi
# ================================================

cd $basePath
if [ ! -f $db_file ]; then
   touch $db_file
fi

function PutObject() {
   VERB="PUT"
   file=$1
   Content_MD5=""
   Content_Type=$(file -b --mime-type $file)
   Date=$(TZ=GMT env LANG=en_US.UTF-8 date +'%a, %d %b %Y %H:%M:%S GMT')
   CanonicalizedOSSHeaders="x-oss-object-acl:public-read\n"
   CanonicalizedResource="/$bucketname/$file"
   stringToSign="$VERB\n$Content_MD5\n$Content_Type\n$Date\n$CanonicalizedOSSHeaders$CanonicalizedResource"
   Signature=$(echo -en $stringToSign | openssl sha1 -hmac $AccessKeySecret -binary | base64)
   Authorization="OSS $AccessKeyId:$Signature"
   http_code=$(curl -v -w "%{http_code}" -X $VERB -H "HOST:$bucketname.$Host" -H "x-oss-object-acl:public-read" -H "Date:$Date" -H "Content-Type:$Content_Type" -H "Authorization:$Authorization" --data-binary "@$file" "https://$bucketname.$Host/$file")
   if [ $http_code -eq "200" ]; then
      rm -f $file
   else
      echo $file >>$db_file_err
   fi
}

function GetBucketV2() {
   VERB="GET"
   Content_MD5=""
   Content_Type=""
   Date=$(TZ=GMT env LANG=en_US.UTF-8 date +'%a, %d %b %Y %H:%M:%S GMT')
   CanonicalizedOSSHeaders=""
   CanonicalizedResource="/$bucketname/"
   stringToSign="$VERB\n$Content_MD5\n$Content_Type\n$Date\n$CanonicalizedOSSHeaders$CanonicalizedResource"
   Signature=$(echo -en $stringToSign | openssl sha1 -hmac $AccessKeySecret -binary | base64)
   Authorization="OSS $AccessKeyId:$Signature"
   curl -v -H "HOST:$bucketname.$Host" -H "Date:$Date" -H "Authorization:$Authorization" "https://$bucketname.$Host/?list-type=2"
}

function DeleteMultipleObjects() {
   file="DeleteMultipleObjects.temp"
   GetBucketV2 | xmlstarlet select -t -m '/ListBucketResult//Contents' -v 'Key' -o ' ' -v 'LastModified' -n >$file
   # 获取备份时间之前的日期时间戳
   oldTimestamp=$(date -d "-$backup_time day" +%s)
   ObjectNode=""
   while read line; do
      filename=$(echo $line | awk '{print $1}')
      date=$(echo $line | awk '{print $2}')
      timestamp=$(date -d "$date" +%s)
      if [[ $filename != "$error_file" && $timestamp -lt $oldTimestamp ]]; then
         ObjectNode="${ObjectNode}<Object><Key>${filename}</Key></Object>"
      fi
   done <$file
   rm $file

   if [ -n "$ObjectNode" ]; then
      echo "存在${backup_time}天以前的图片"
      compress_xml='<?xml version="1.0" encoding="UTF-8"?><Delete><Quiet>true</Quiet>'${ObjectNode}'</Delete>'
      echo $compress_xml | xmlstarlet format
      VERB="POST"
      Content_MD5=$(echo -n $compress_xml | openssl dgst -md5 -binary | base64)
      Content_Type="application/xml"
      Date=$(TZ=GMT env LANG=en_US.UTF-8 date +'%a, %d %b %Y %H:%M:%S GMT')
      CanonicalizedOSSHeaders=""
      CanonicalizedResource="/$bucketname/?delete"
      stringToSign="$VERB\n$Content_MD5\n$Content_Type\n$Date\n$CanonicalizedOSSHeaders$CanonicalizedResource"
      echo "sign:$stringToSign"
      Signature=$(echo -en $stringToSign | openssl sha1 -hmac $AccessKeySecret -binary | base64)
      Authorization="OSS $AccessKeyId:$Signature"
      http_code=$(curl -X $VERB -v -w "%{http_code}" -H "HOST:$bucketname.$Host" -H "Date:$Date" -H "Content-Type:$Content_Type" -H "Content-MD5:$Content_MD5" -H "Authorization:$Authorization" -d "$compress_xml" "https://$bucketname.$Host/?delete")
      if [ $http_code -eq "200" ]; then
         echo "${backup_time}天以前的图片删除成功"
      else
         echo "${backup_time}天以前的图片删除失败"
      fi
      echo '' >$db_file
      GetBucketV2 | xmlstarlet select -t -m '/ListBucketResult//Contents' -v 'Key' -n | while read line; do
         if [ $line != "$error_file" ]; then
            echo $line >>$db_file
         fi
      done
   else
      echo "没有${backup_time}天以前的图片需要删除"
   fi
}

function fileSizeStr() {
   fileSize=$1
   if [[ $fileSize -gt 1048576 ]]; then
      echo "$(echo "scale=2;$fileSize/1048576" | bc)MB"
   else
      echo "$((fileSize / 1024))KB"
   fi
}

function Request() {
   while true; do
      echo "发送消息命令：$1"
      result=$(bash -c "$1")
      echo "请求响应：$result"
      if [ $(echo $result | jq .ok) = true ]; then
         return 1
      elif [ $(echo $result | jq .error_code) -eq 429 ]; then
         second=$(echo $result | jq .parameters.retry_after)
         sleep $second
         echo "${second}后再发起请求"
         Request $1
      else
         echo "执行命令遇到未知错误！"
         return 0
      fi
   done
}

telegramToken=$(cat $config_file | jq -r .telegramToken)
baseApi="https://api.telegram.org/bot$telegramToken"
chat_id=$(cat $config_file | jq .chatId)
mode=$(cat $config_file | jq -r .mode)
content=$(cat $config_file | jq -r .content)
rank_url="https://www.pixiv.net/ranking.php?mode=$mode&content=$content&p=1&format=json"
today=$(date "+%Y-%m-%d")
_today=$(date "+%Y%m%d")
rank_json=$today.json

rule='sed -e "s/\\&amp;/\\&/g"|sed -e "s/\\&lt;/\\</g"|sed -e "s/\\&gt;/\\>/g"'

Request "curl -v -d chat_id=$chat_id -d parse_mode=HTML  -d text=\"Pixiv排行榜已更新，开始处理<a href='https://www.pixiv.net/ranking.php?mode%3D$mode%26content%3D$content'>$today日榜</a>数据。#date$_today #日期$_today %0A%0A<strong>排名是什么？</strong>%0A排名是以pixiv上所有公开作品为对象的统计以及排名。%0A毎日0:00～23时59分59秒的阅览树・「赞！」数等为排名的依据，期结果由pixiv独自的算法「pixiv rank β」决定。统计结果于每日中午12:00公开。%0A<a href='https://www.pixiv.help/hc/zh-cn/categories/360001065093-%E6%9C%89%E5%85%B3%E6%8E%92%E8%A1%8C%E6%A6%9C'>有关排行榜</a>\" $baseApi/sendMessage"

if [ ! -f $rank_json ]; then
   echo "get data from $rank_url"
   curl -v $rank_url >$rank_json
fi
length=$(jq '.contents|length' $rank_json)

fileList=''
media=''
fileCount=0
maxFileCount=10
maxFileSize=20971520
start_rank=''
end_rank=''
maxLikeCount=0
maxLikeCountPid=0
maxBookmarkCount=0
maxBookmarkCountPid=0
maxViewCount=0
maxViewCountPid=0
maxRatio=10
for index in $(seq 1 $length); do
   index=$((index - 1))
   pid=$(jq --argjson index $index '.contents[$index].illust_id' $rank_json)
   artworkLink="https://www.pixiv.net/artworks/$pid"
   rank=$(jq --argjson index $index '.contents[$index].rank' $rank_json)
   yes_rank=$(jq --argjson index $index '.contents[$index].yes_rank' $rank_json)

   if [ $(((index + 1) % 10)) == 1 ]; then
      start_rank=$rank
   fi
   if [ $(((index + 1) % 10)) == 0 ]; then
      end_rank=$rank
   fi
   if [ $yes_rank -eq 0 ]; then
      rank_info="\#排名$rank \#rank$rank \#首次登场"
   else
      rank_info="\#排名$rank \#rank$rank 之前 \#排名$yes_rank \#rank$yes_rank"
   fi
   echo "pid=$pid,artworkLink=$artworkLink,rank_info=$rank_info"
   png_html_file=$pid.html
   if [ ! -f $png_html_file ]; then
      echo "get data from $artworkLink"
      curl -v $artworkLink >$png_html_file
   fi
   json_file=$pid.json
   if [ ! -f $json_file ]; then
      egrep -o "content='{\"timestamp.*" $png_html_file | sed -e "s/content='//" | sed -e "s/..$//" >$json_file
   fi
   pageCount=$(jq --arg pid $pid '.illust[$pid].pageCount' $json_file)
   original_url=$(jq -r --arg pid $pid '.illust[$pid].urls.original' $json_file)
   small_url=$(jq -r --arg pid $pid '.illust[$pid].urls.small' $json_file)
   title=$(jq -r --arg pid $pid '.illust[$pid].title' $json_file | sed -e 's/\"/\\\"/g' | sed -e 's/&lt;//g' | sed -e 's/&gt;//g')
   title=$(bash -c "echo '$title'|$rule")
   description=$(jq -r --arg pid $pid '.illust[$pid].description' $json_file)
   userName=$(jq -r --arg pid $pid '.illust[$pid].userName' $json_file | sed -e 's/\"/\\\"/g')
   userName=$(bash -c "echo '$userName'|$rule")
   userId=$(jq -r --arg pid $pid '.illust[$pid].userId' $json_file)
   likeCount=$(jq --arg pid $pid '.illust[$pid].likeCount' $json_file)
   if [ $likeCount -gt $maxLikeCount ]; then
      maxLikeCount=$likeCount
      maxLikeCountPid=$pid
   fi
   bookmarkCount=$(jq --arg pid $pid '.illust[$pid].bookmarkCount' $json_file)
   if [ $bookmarkCount -gt $maxBookmarkCount ]; then
      maxBookmarkCount=$likeCount
      maxBookmarkCountPid=$pid
   fi
   viewCount=$(jq --arg pid $pid '.illust[$pid].viewCount' $json_file)
   if [ $viewCount -gt $maxViewCount ]; then
      maxViewCount=$likeCount
      maxViewCountPid=$pid
   fi
   tag=$(jq -r --arg pid $pid '.illust[$pid].tags.tags[].tag' $json_file | sed -e 's/^/\\#/g' | sed ':a;N;s/\n/ /;t a;' | sed 's/"/\\"/g')
   tag=$(bash -c "echo '$tag'|$rule")

   echo -e "pageCount=$pageCount,original_url=$original_url,small_url=$small_url\n\
        title=$title,description=$description,userName=$userName\n\
        likeCount=$likeCount,bookmarkCount=$bookmarkCount,viewCount=$viewCount\n\
        tag=$tag"

   for page in $(seq 1 $pageCount); do
      page=$((page - 1))
      page_original_url=$(echo $original_url | sed -e "s/p0/p$page/")
      page_small_url=$(echo $small_url | sed -e"s/p0/p$page/")
      original_file_name=$(echo $page_original_url | egrep -o "$pid.*")
      webp_file_name=$(echo $original_file_name | sed 's/jpg/webp/' | sed 's/png/webp/')

      if [ "$(cat $db_file | grep $original_file_name)" != "$original_file_name" ]; then
         echo "download image file name=$original_file_name,url=$page_original_url"
         if [ ! -f $original_file_name ]; then
            curl -v -H 'referer: https://www.pixiv.net/' $page_original_url -o $original_file_name
         fi
         original_file_size=$(du -b $original_file_name | awk '{print $1}')
         if [ $original_file_size -gt $maxFileSize ]; then
            echo "${original_file_name}文件体积：${original_file_size}字节超过${maxFileSize}字节限制，需要在线压缩"
            result=$(curl -v -X POST -d "{\"apikey\": \"$convertio_apikey\", \"input\":\"upload\", \"outputformat\":\"webp\"}" http://api.convertio.co/convert)

            if [ $(echo $result | jq -r .status) = ok ]; then
               id=$(echo $result | jq -r .data.id)
               result=$(curl -v -X PUT --upload-file $original_file_name http://api.convertio.co/convert/$id/$original_file_name)
               if [ $(echo $result | jq -r .status) = ok ]; then
                  while true; do
                     wait_second=10
                     sleep $wait_second
                     result=$(curl -v -X GET http://api.convertio.co/convert/$id/status)
                     echo "转换响应：$result"
                     if [[ $(echo $result | jq -r .status) = ok && $(echo $result | jq -r .data.step) = finish ]]; then
                        compress_size=$(echo $result | jq -r .data.output.size)
                        echo "${original_file_name}文件体积：${original_file_size}字节，压缩后：${compress_size}字节"
                        if [ $compress_size -gt $maxFileSize ]; then
                           echo "压缩无效，返回异常图片"
                           FileSize=$(fileSizeStr $compress_size)
                           url="https://$bucketname.$accelerateHost/$error_file?x-oss-process=image/watermark,text_$(echo "画作pid：$pid" | base64),g_north/watermark,text_$(echo 因为图片文件体积：${FileSize}不合法 | base64 | sed 's/+/-/g; s,/,_,g'),color_FF0000,y_50,g_north/watermark,text_5omA5Lul57yp55Wl5Zu-5peg5rOV5LiK5Lyg5YiwdGVsZWdyYW0=,color_FF0000,y_100,g_north/watermark,text_5bi46KeB5LqO6LaF6auY5YOP57Sg5Zu-54mH,y_150,g_north/watermark,text_5rOo5oSP77ya4oCc6aKE6KeI5Y6f5Zu-4oCd44CB4oCc5Yqg6YCf5LiL6L295Y6f5Zu-4oCd,color_008800,y_200,g_north/watermark,text_5Lul5LiK5Yqf6IO954Wn5bi45L2_55So,color_008800,y_250,g_north"
                           echo "文字水印请求url：$url"
                           curl -v $url -o $webp_file_name
                        else
                           echo $result | jq .data.output.url | xargs curl -v -o $webp_file_name
                           original_file_name=$webp_file_name
                        fi
                        break
                     elif [ $(echo $result | jq -r .status) = error ]; then
                        break
                     else
                        echo "${wait_second}s后重新获取转换结果"
                     fi
                  done
               else
                  echo "Direct File Upload For Conversion error!!!"
               fi
            else
               echo "Start a New Conversion error!!!"
            fi
         fi
         PutObject $original_file_name

      fi
      if [ ! -f $webp_file_name ]; then
         webp_url="https://$bucketname.$accelerateHost/$original_file_name$imageParam"
         echo "download image file name=$webp_file_name,url=$webp_url"
         curl -v $webp_url -o $webp_file_name
         image_size=$(file $webp_file_name | awk '{print $9}' | sed -e 's/,//')
         image_ratio=$(echo $image_size | sed '/^$/d' | awk '{split($0,a,"x");f=a[1]>a[2]?a[1]/a[2]:a[2]/a[1];print f}')
         if [[ -n $image_ratio && $(echo "$image_ratio > $maxRatio" | bc) -eq 1 ]]; then
            echo "图片分辨率：${image_size},分辨率比例：${image_ratio}>${maxRatio},视为异常，返回异常图片"
            url="https://$bucketname.$accelerateHost/$error_file?x-oss-process=image/watermark,text_$(echo "画作pid：$pid" | base64),g_north/watermark,text_$(echo 因为图片分辨率：${image_size}不合法 | base64 | sed 's/+/-/g; s,/,_,g'),color_FF0000,y_50,g_north/watermark,text_5omA5Lul57yp55Wl5Zu-5peg5rOV5LiK5Lyg5YiwdGVsZWdyYW0=,color_FF0000,y_100,g_north/watermark,text_5bi46KeB5LqO6LaF6ZW_5p2h54q25ryr55S7,y_150,g_north/watermark,text_5rOo5oSP77ya4oCc6aKE6KeI5Y6f5Zu-4oCd44CB4oCc5Yqg6YCf5LiL6L295Y6f5Zu-4oCd,color_008800,y_200,g_north/watermark,text_5Lul5LiK5Yqf6IO954Wn5bi45L2_55So,color_008800,y_250,g_north"
            echo "文字水印请求url：$url"
            curl -v $url -o $webp_file_name
         fi
      fi

      if [ $page -eq 0 ]; then
         info_file=$original_file_name.info
         curl -v -o $info_file https://$bucketname.$Host/$original_file_name?x-oss-process=image/info
         FileSize=$(fileSizeStr $(cat $info_file | jq -r .FileSize.value))
         ImageHeight=$(cat $info_file | jq -r .ImageHeight.value)
         ImageWidth=$(cat $info_file | jq -r .ImageWidth.value)
         media="$media,{\"type\":\"photo\",\"media\":\"attach://$webp_file_name\",\"parse_mode\":\"HTML\",\"caption\":\"$rank_info\n<a href=\\\"$artworkLink\\\">$title</a>\n<a href=\\\"https://www.pixiv.net/users/$userId\\\">$userName</a>\n$tag\n原图分辨率：${ImageWidth}X${ImageHeight}(宽X高)，文件体积：$FileSize，<a href=\\\"https://$bucketname.$Host/$original_file_name\\\">预览原图</a>  <a href=\\\"https://$bucketname.$accelerateHost/$original_file_name\\\">加速下载原图</a>\"}"
         fileList="$fileList -F $webp_file_name=@$webp_file_name"
         fileCount=$((fileCount + 1))
         fileSize=$(du $webp_file_name | awk '{print $1}')
         echo "fileCount=$fileCount"
      fi
      if [[ $fileCount -eq $maxFileCount ]]; then
         Request "curl -v -F  chat_id=$chat_id $fileList -F media='[$(echo $media | cut -c 2-)]' $baseApi/sendMediaGroup"
         if [ $(echo $?) -eq 1 ]; then
            sleep $sleepImage
            text="以上作品日榜排名分别是 #排名${start_rank}_${end_rank} #rank${start_rank}_${end_rank} ，点击作品可以查看pid/标题/画师/tag信息."
         else
            text="#排名${start_rank}_${end_rank} #rank${start_rank}_${end_rank} 作品上传失败，请联系管理员"
         fi

         Request "curl -v -d chat_id=$chat_id -d text=\"$text\" $baseApi/sendMessage"

         fileList=''
         media=''
         fileCount=0
      fi
   done
done

next_expected_at=$(curl -v https://cronitor.io/api/monitors/$CronitorJobName -u $CronitorKey:'' | jq .next_expected_at)
Request "curl -v -d chat_id=$chat_id -d text=\"以上就是$today日榜前${length}名作品，本次推送完毕，下次推送时间预计是$(date -d @$next_expected_at '+%Y-%m-%d %H:%M:%S')，如有问题请联系管理员。 #date$_today  #日期$_today \" $baseApi/sendMessage"

find -type f -name "*.jpg" -mtime +$backup_time -o -name "*.png" -mtime +$backup_time -o -name "*.webp" -mtime +$backup_time -o -name "*.html" -mtime +$backup_time -o -name "*.info" -mtime +$backup_time -o -name "*.json" -mtime +$backup_time -not -name $(basename $config_file) | xargs rm -f
DeleteMultipleObjects