#!/bin/bash config_file=$1 if [ -z $config_file ] then echo 'The cache directory must be specified' exit 1 fi if [ ! -f $config_file ] then echo 'config file:$config_file not found' exit 1 fi if [ ! -f `which jq` ] then echo 'To run this script, you need to install jq,https://stedolan.github.io/jq/' exit 1 else echo "jq version:`jq --version`" fi # ==================== Config ==================== Host=`cat $config_file | jq -r .aliyun_oss.Host` accelerateHost=`cat $config_file | jq -r .aliyun_oss.accelerateHost` bucketname=`cat $config_file | jq -r .aliyun_oss.bucketname` AccessKeyId=`cat $config_file | jq -r .aliyun_oss.AccessKeyId` AccessKeySecret=`cat $config_file | jq -r .aliyun_oss.AccessKeySecret` db_file=`cat $config_file | jq -r .aliyun_oss.db` db_file_err=$db_file.err imageParam=`cat $config_file | jq -r .aliyun_oss.imageParam` deleteApi=`cat $config_file | jq -r .aliyun_oss.deleteApi` convertio_apikey=`cat $config_file | jq -r .convertio.apikey` CronitorKey=`cat $config_file | jq -r .Cronitor.API_KEY` CronitorJobName=`cat $config_file | jq -r .Cronitor.JOB_NAME` # ================================================ if [ ! -f $db_file ] then touch $db_file fi function PutObject(){ VERB="PUT" file=$1 Content_MD5="" Content_Type=`file -b --mime-type $file` Date=`TZ=GMT env LANG=en_US.UTF-8 date +'%a, %d %b %Y %H:%M:%S GMT'` CanonicalizedOSSHeaders="x-oss-object-acl:public-read\n" CanonicalizedResource="/$bucketname/$file" stringToSign="$VERB\n$Content_MD5\n$Content_Type\n$Date\n$CanonicalizedOSSHeaders$CanonicalizedResource" Signature=`echo -en $stringToSign | openssl sha1 -hmac $AccessKeySecret -binary | base64` Authorization="OSS $AccessKeyId:$Signature" http_code=`curl -v -w "%{http_code}" -X $VERB -H "HOST:$bucketname.$Host" -H "x-oss-object-acl:public-read" -H "Date:$Date" -H "Content-Type:$Content_Type" -H "Authorization:$Authorization" --data-binary "@$file" "https://$bucketname.$Host/$file"` if [ $http_code -eq "200" ] then echo $file>>$db_file rm -f $file else echo $file>>$db_file_err fi } function DeleteMultipleObjects(){ temp_file=DeleteMultipleObjects.json while true do curl -v -o $temp_file -d "AccessKeyId=$AccessKeyId&AccessKeySecret=$AccessKeySecret&host=$Host&bucketname=$bucketname&max=50" $deleteApi/aliyun_oss cat $temp_file count=`cat $temp_file|jq -r .count` echo "剩余需要清理文件个数:$count" if [ $count == 0 ] then echo "delete finish" break fi done rm -f $temp_file curl -v -o $db_file -d "AccessKeyId=$AccessKeyId&AccessKeySecret=$AccessKeySecret&host=$Host&bucketname=$bucketname" $deleteApi/list_file } function fileSizeStr(){ fileSize=$1 if [[ $fileSize -gt 1048576 ]] then echo "`echo "scale=2;$fileSize/1048576"|bc`MB" else echo "$((fileSize/1024))KB" fi } basePath=`cat $config_file|jq -r .basePath` if [ ! -f $basePath ] then mkdir -p $basePath fi cd $basePath telegramToken=`cat $config_file | jq -r .telegramToken` baseApi="https://api.telegram.org/bot$telegramToken" chat_id=`cat $config_file | jq .chatId` mode=`cat $config_file | jq -r .mode` content=`cat $config_file | jq -r .content` rank_url="https://www.pixiv.net/ranking.php?mode=$mode&content=$content&p=1&format=json" today=`date "+%Y-%m-%d"` _today=`date "+%Y%m%d"` rank_json=$today.json anonfiles_token=`cat $config_file | jq .anonfilesToken` rule=`cat $config_file | jq .rule|jq to_entries|jq 'map("sed -e \"s/\\\\"+.key+"/\\\\"+.value+"/g\"")'|jq -r '.[]'|sed ':a;N;s/\n/|/;t a;'` sleepText=`cat $config_file | jq .sleep.text` sleepImage=`cat $config_file | jq .sleep.image` curl -v -d chat_id=$chat_id -d parse_mode=HTML -d text="Pixiv排行榜已更新,$sleepText秒后开始处理$today日榜数据。#date$_today #日期$_today %0A%0A排名是什么?%0A排名是以pixiv上所有公开作品为对象的统计以及排名。%0A毎日0:00~23时59分59秒的阅览树・「赞!」数等为排名的依据,期结果由pixiv独自的算法「pixiv rank β」决定。统计结果于每日中午12:00公开。%0A有关排行榜" $baseApi/sendMessage sleep $sleepText if [ ! -f $rank_json ] then echo "get data from $rank_url" curl -v $rank_url >$rank_json fi length=`jq '.contents|length' $rank_json` fileCountSize=0 fileList='' media='' fileCount=0 maxFileCount=10 tarFile=$today.tar.gz maxFileSize=20971520 maxFileSize_M="$((maxFileSize/1024/1024))M" start_rank='' end_rank='' maxLikeCount=0 maxLikeCountPid=0 maxBookmarkCount=0 maxBookmarkCountPid=0 maxViewCount=0 maxViewCountPid=0 maxRatio=10 for index in `seq 1 $length` do index=$((index-1)) pid=`jq --argjson index $index '.contents[$index].illust_id' $rank_json` artworkLink="https://www.pixiv.net/artworks/$pid" rank=`jq --argjson index $index '.contents[$index].rank' $rank_json` yes_rank=`jq --argjson index $index '.contents[$index].yes_rank' $rank_json` if [ $(((index+1) % 10)) == 1 ] then start_rank=$rank fi if [ $(((index+1) % 10)) == 0 ] then end_rank=$rank fi if [ $yes_rank -eq 0 ] then rank_info="\#排名$rank \#rank$rank \#首次登场" else rank_info="\#排名$rank \#rank$rank 之前 \#排名$yes_rank \#rank$yes_rank" fi echo "pid=$pid,artworkLink=$artworkLink,rank_info=$rank_info" png_html_file=$pid.html if [ ! -f $png_html_file ] then echo "get data from $artworkLink" curl -v $artworkLink >$png_html_file fi json_file=$pid.json if [ ! -f $json_file ] then egrep -o "content='{\"timestamp.*" $png_html_file|sed -e "s/content='//"|sed -e "s/..$//" >$json_file fi pageCount=`jq --arg pid $pid '.illust[$pid].pageCount' $json_file` original_url=`jq -r --arg pid $pid '.illust[$pid].urls.original' $json_file` small_url=`jq -r --arg pid $pid '.illust[$pid].urls.small' $json_file` title=`jq -r --arg pid $pid '.illust[$pid].title' $json_file|sed -e 's/\"/\\\"/g'|sed -e 's/<//g'|sed -e 's/>//g'` title=`bash -c "echo '$title'|$rule"` description=`jq -r --arg pid $pid '.illust[$pid].description' $json_file` userName=`jq -r --arg pid $pid '.illust[$pid].userName' $json_file|sed -e 's/\"/\\\"/g'` userName=`bash -c "echo '$userName'|$rule"` userId=`jq -r --arg pid $pid '.illust[$pid].userId' $json_file` likeCount=`jq --arg pid $pid '.illust[$pid].likeCount' $json_file` if [ $likeCount -gt $maxLikeCount ] then maxLikeCount=$likeCount maxLikeCountPid=$pid fi bookmarkCount=`jq --arg pid $pid '.illust[$pid].bookmarkCount' $json_file` if [ $bookmarkCount -gt $maxBookmarkCount ] then maxBookmarkCount=$likeCount maxBookmarkCountPid=$pid fi viewCount=`jq --arg pid $pid '.illust[$pid].viewCount' $json_file` if [ $viewCount -gt $maxViewCount ] then maxViewCount=$likeCount maxViewCountPid=$pid fi tag=`jq -r --arg pid $pid '.illust[$pid].tags.tags[].tag' $json_file|sed -e 's/^/\\#/g'|sed ':a;N;s/\n/ /;t a;'` tag=`bash -c "echo '$tag'|$rule"` echo -e "pageCount=$pageCount,original_url=$original_url,small_url=$small_url\n\ title=$title,description=$description,userName=$userName\n\ likeCount=$likeCount,bookmarkCount=$bookmarkCount,viewCount=$viewCount\n\ tag=$tag" for page in `seq 1 $pageCount` do page=$((page - 1)) page_original_url=`echo $original_url | sed -e "s/p0/p$page/"` page_small_url=`echo $small_url | sed -e"s/p0/p$page/"` original_file_name=`echo $page_original_url | egrep -o "$pid.*"` webp_file_name=`echo $original_file_name|sed 's/jpg/webp/'|sed 's/png/webp/'` if [ "`cat $db_file|grep $original_file_name`" != "$original_file_name" ] then echo "download image file name=$original_file_name,url=$page_original_url" if [ ! -f $original_file_name ] then curl -v -H 'referer: https://www.pixiv.net/' $page_original_url -o $original_file_name fi original_file_size=`du -b $original_file_name|awk '{print $1}'` if [ $original_file_size -gt $maxFileSize ] then echo "${original_file_name}文件体积超过${maxFileSize}字节,需要在线压缩" result=`curl -v -X POST -d "{\"apikey\": \"$convertio_apikey\", \"input\":\"upload\", \"outputformat\":\"jpeg\"}" http://api.convertio.co/convert` if [ `echo $result|jq -r .status` = ok ] then id=`echo $result|jq -r .data.id` result=`curl -v -X PUT --upload-file $original_file_name http://api.convertio.co/convert/$id/$original_file_name` if [ `echo $result|jq -r .status` = ok ] then while true do sleep 10 result=`curl -v -X GET http://api.convertio.co/convert/$id/status` if [[ `echo $result|jq -r .status` = ok && `echo $result|jq -r .data.step` = finish ]] then echo $result|jq .data.output.url|xargs curl -v -o $original_file_name PutObject $original_file_name break elif [ `echo $result|jq -r .status` = error ] then break else echo "10s后重新获取转换结果" fi done else echo "Direct File Upload For Conversion error!!!" fi else echo "Start a New Conversion error!!!" fi else PutObject $original_file_name fi fi if [ ! -f $webp_file_name ] then webp_url="https://$bucketname.$accelerateHost/$original_file_name$imageParam" echo "download image file name=$webp_file_name,url=$webp_url" curl -v $webp_url -o $webp_file_name image_ratio=`file $webp_file_name|awk '{print $9}'|sed -e 's/,//'|sed '/^$/d'|awk '{split($0,a,"x");f=a[1]>a[2]?a[1]/a[2]:a[2]/a[1];print f}'` if [[ -n $image_ratio && $image_ratio > $maxRatio ]] then echo "图片分辨率`file $webp_file_name|awk '{print $9}'|sed -e 's/,//'`比率:$image_ratio>$maxRatio,视为异常,返回异常图片" watermark=`echo "画作pid:$pid"|base64` url="https://$bucketname.$accelerateHost/error.jpeg?x-oss-process=image/watermark,text_`echo "画作pid:$pid"|base64`,g_north/watermark,text_5YiG6L6o546H5q%2BU5L6L5byC5bi4Cg==,g_center/watermark,text_5bi46KeB5LqO6LaF6ZW_5Zu-,g_south" echo "文字水印请求url:$url" curl -v $url -o $webp_file_name fi fi if [ $page -eq 0 ] then info_file=$original_file_name.info original_image_info=`curl -v --output $info_file https://$bucketname.$Host/$original_file_name?x-oss-process=image/info` FileSize=$(fileSizeStr $(cat $info_file|jq -r .FileSize.value)) ImageHeight=$(cat $info_file|jq -r .ImageHeight.value) ImageWidth=$(cat $info_file|jq -r .ImageWidth.value) media="$media,{\"type\":\"photo\",\"media\":\"attach://$webp_file_name\",\"parse_mode\":\"HTML\",\"caption\":\"$rank_info\n$title\n$userName\n$tag\n原图分辨率:${ImageWidth}X${ImageHeight}(宽X高),文件体积:$FileSize,预览原图 加速下载原图\"}" fileList="$fileList -F $webp_file_name=@$webp_file_name" fileCount=$((fileCount + 1)) fileSize=`du $webp_file_name | awk '{print $1}'` fileCountSize=$((fileCountSize + fileSize)) echo "fileCountSize=$fileCountSize,fileCount=$fileCount" fi if [[ $fileCount -eq $maxFileCount ]] then command="curl -v -F chat_id=$chat_id $fileList -F media='[`echo $media | cut -c 2-`]' $baseApi/sendMediaGroup" echo "上传图片命令:$command" bash -c "$command" sleep $sleepImage curl -v -d chat_id=$chat_id -d text="以上作品日榜排名分别是 #排名${start_rank}_${end_rank} #rank${start_rank}_${end_rank} ,点击作品可以查看pid/标题/画师/tag信息." $baseApi/sendMessage sleep $sleepText fileCountSize=0 fileList='' media='' fileCount=0 fi done done next_expected_at=`curl -v https://cronitor.io/api/monitors/$CronitorJobName -u $CronitorKey:''|jq .next_expected_at` curl -v -d chat_id=$chat_id -d text="以上就是$today日榜前${length}名作品,本次推送完毕,下次推送时间预计是`date -d @$next_expected_at '+%Y-%m-%d %H:%M:%S'`,如有问题请联系管理员。 #date$_today #日期$_today " $baseApi/sendMessage find -type f -mtime +7|grep html find -type f -mtime +7|grep json find -type f -mtime +7|grep webp DeleteMultipleObjects