You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
shelltool/pixiv/pixiv.sh

270 lines
13 KiB

#!/bin/bash
config_file=$1
if [ -z $config_file ]
then
echo 'The cache directory must be specified'
exit 1
fi
if [ ! -f $config_file ]
then
echo 'config file:$config_file not found'
exit 1
fi
if [ ! -f `which jq` ]
then
echo 'To run this script, you need to install jq,https://stedolan.github.io/jq/'
exit 1
else
echo "jq version:`jq --version`"
fi
# ==================== Config ====================
Host=`cat $config_file | jq -r .aliyun_oss.Host`
accelerateHost=`cat $config_file | jq -r .aliyun_oss.accelerateHost`
bucketname=`cat $config_file | jq -r .aliyun_oss.bucketname`
AccessKeyId=`cat $config_file | jq -r .aliyun_oss.AccessKeyId`
AccessKeySecret=`cat $config_file | jq -r .aliyun_oss.AccessKeySecret`
db_file=`cat $config_file | jq -r .aliyun_oss.db`
db_file_err=$db_file.err
imageParam=`cat $config_file | jq -r .aliyun_oss.imageParam`
CompressHost=`cat $config_file | jq -r .compress.Host`
CompressPort=`cat $config_file | jq -r .compress.Port`
CompressUser=`cat $config_file | jq -r .compress.User`
CompressPath=`cat $config_file | jq -r .compress.Path`
CronitorKey=`cat $config_file | jq -r .Cronitor.API_KEY`
CronitorJobName=`cat $config_file | jq -r .Cronitor.JOB_NAME`
# ================================================
if [ ! -f $db_file ]
then
touch $db_file
fi
function upload(){
VERB="PUT"
file=$1
Content_MD5=""
Content_Type="application/x-www-form-urlencoded"
Content_Type="text/plain"
Content_Type=`file -b --mime-type $file`
Date=`TZ=GMT env LANG=en_US.UTF-8 date +'%a, %d %b %Y %H:%M:%S GMT'`
CanonicalizedOSSHeaders="x-oss-object-acl:public-read\n"
CanonicalizedResource="/$bucketname/$file"
stringToSign="$VERB\n$Content_MD5\n$Content_Type\n$Date\n$CanonicalizedOSSHeaders$CanonicalizedResource"
Signature=`echo -en $stringToSign | openssl sha1 -hmac $AccessKeySecret -binary | base64`
Authorization="OSS $AccessKeyId:$Signature"
http_code=`curl -v -w "%{http_code}" -X PUT -H "HOST:$bucketname.$Host" -H "x-oss-object-acl:public-read" -H "Date:$Date" -H "Content-Type:$Content_Type" -H "Authorization:$Authorization" --data-binary "@$file" "https://$bucketname.$Host/$file"`
if [ $http_code -eq "200" ]
then
echo $file>>$db_file
rm -f $file
else
echo $file>>$db_file_err
fi
}
function fileSizeStr(){
fileSize=$1
if [[ $fileSize -gt 1048576 ]]
then
echo "`echo "scale=2;$fileSize/1048576"|bc`MB"
else
echo "$((fileSize/1024))KB"
fi
}
basePath=`cat $config_file|jq -r .basePath`
if [ ! -f $basePath ]
then
mkdir -p $basePath
fi
cd $basePath
telegramToken=`cat $config_file | jq -r .telegramToken`
baseApi="https://api.telegram.org/bot$telegramToken"
chat_id=`cat $config_file | jq .chatId`
mode=`cat $config_file | jq -r .mode`
content=`cat $config_file | jq -r .content`
rank_url="https://www.pixiv.net/ranking.php?mode=$mode&content=$content&p=1&format=json"
today=`date "+%Y-%m-%d"`
_today=`date "+%Y%m%d"`
rank_json=$today.json
anonfiles_token=`cat $config_file | jq .anonfilesToken`
rule=`cat $config_file | jq .rule|jq to_entries|jq 'map("sed -e \"s/\\\\"+.key+"/\\\\"+.value+"/g\"")'|jq -r '.[]'|sed ':a;N;s/\n/|/;t a;'`
sleepText=`cat $config_file | jq .sleep.text`
sleepImage=`cat $config_file | jq .sleep.image`
curl -v -d chat_id=$chat_id -d parse_mode=HTML -d text="Pixiv排行榜已更新,$sleepText秒后开始处理<a href='https://www.pixiv.net/ranking.php?mode%3D$mode%26content%3D$content'>$today日榜</a>数据。#date$_today #日期$_today %0A%0A<strong>排名是什么?</strong>%0A排名是以pixiv上所有公开作品为对象的统计以及排名。%0A毎日0:00~23时59分59秒的阅览树・「赞!」数等为排名的依据,期结果由pixiv独自的算法「pixiv rank β」决定。统计结果于每日中午12:00公开。%0A<a href='https://www.pixiv.help/hc/zh-cn/categories/360001065093-%E6%9C%89%E5%85%B3%E6%8E%92%E8%A1%8C%E6%A6%9C'>有关排行榜</a>" $baseApi/sendMessage
sleep $sleepText
if [ ! -f $rank_json ]
then
echo "get data from $rank_url"
curl -v $rank_url >$rank_json
fi
length=`jq '.contents|length' $rank_json`
fileCountSize=0
fileList=''
media=''
fileCount=0
maxFileCount=10
tarFile=$today.tar.gz
maxFileSize=20971520
maxFileSize_M="$((maxFileSize/1024/1024))M"
start_rank=''
end_rank=''
maxLikeCount=0
maxLikeCountPid=0
maxBookmarkCount=0
maxBookmarkCountPid=0
maxViewCount=0
maxViewCountPid=0
maxRatio=10
for index in `seq 1 $length`
do
index=$((index-1))
pid=`jq --argjson index $index '.contents[$index].illust_id' $rank_json`
artworkLink="https://www.pixiv.net/artworks/$pid"
rank=`jq --argjson index $index '.contents[$index].rank' $rank_json`
yes_rank=`jq --argjson index $index '.contents[$index].yes_rank' $rank_json`
if [ $(((index+1) % 10)) == 1 ]
then
start_rank=$rank
fi
if [ $(((index+1) % 10)) == 0 ]
then
end_rank=$rank
fi
if [ $yes_rank -eq 0 ]
then
rank_info="\#排名$rank \#rank$rank \#首次登场"
else
rank_info="\#排名$rank \#rank$rank 之前 \#排名$yes_rank \#rank$yes_rank"
fi
echo "pid=$pid,artworkLink=$artworkLink,rank_info=$rank_info"
png_html_file=$pid.html
if [ ! -f $png_html_file ]
then
echo "get data from $artworkLink"
curl -v $artworkLink >$png_html_file
fi
json_file=$pid.json
if [ ! -f $json_file ]
then
egrep -o "content='{\"timestamp.*].{3}" $png_html_file | sed -e "s/content='//" >$json_file
fi
pageCount=`jq --arg pid $pid '.illust[$pid].pageCount' $json_file`
original_url=`jq -r --arg pid $pid '.illust[$pid].urls.original' $json_file`
small_url=`jq -r --arg pid $pid '.illust[$pid].urls.small' $json_file`
title=`jq -r --arg pid $pid '.illust[$pid].title' $json_file|sed -e 's/\"/\\\"/g'`
title=`bash -c "echo '$title'|$rule"`
description=`jq -r --arg pid $pid '.illust[$pid].description' $json_file`
userName=`jq -r --arg pid $pid '.illust[$pid].userName' $json_file|sed -e 's/\"/\\\"/g'`
userName=`bash -c "echo '$userName'|$rule"`
userId=`jq -r --arg pid $pid '.illust[$pid].userId' $json_file`
likeCount=`jq --arg pid $pid '.illust[$pid].likeCount' $json_file`
if [ $likeCount -gt $maxLikeCount ]
then
maxLikeCount=$likeCount
maxLikeCountPid=$pid
fi
bookmarkCount=`jq --arg pid $pid '.illust[$pid].bookmarkCount' $json_file`
if [ $bookmarkCount -gt $maxBookmarkCount ]
then
maxBookmarkCount=$likeCount
maxBookmarkCountPid=$pid
fi
viewCount=`jq --arg pid $pid '.illust[$pid].viewCount' $json_file`
if [ $viewCount -gt $maxViewCount ]
then
maxViewCount=$likeCount
maxViewCountPid=$pid
fi
tag=`jq -r --arg pid $pid '.illust[$pid].tags.tags[].tag' $json_file|sed -e 's/^/\\#/g'|sed ':a;N;s/\n/ /;t a;'`
tag=`bash -c "echo '$tag'|$rule"`
echo -e "pageCount=$pageCount,original_url=$original_url,small_url=$small_url\n\
title=$title,description=$description,userName=$userName\n\
likeCount=$likeCount,bookmarkCount=$bookmarkCount,viewCount=$viewCount\n\
tag=$tag"
for page in `seq 1 $pageCount`
do
page=$((page - 1))
page_original_url=`echo $original_url | sed -e "s/p0/p$page/"`
page_small_url=`echo $small_url | sed -e"s/p0/p$page/"`
original_file_name=`echo $page_original_url | egrep -o "$pid.*"`
webp_file_name=`echo $original_file_name|sed 's/jpg/webp/'|sed 's/png/webp/'`
if [ "`cat $db_file|grep $original_file_name`" != "$original_file_name" ]
then
echo "download image file name=$original_file_name,url=$page_original_url"
if [ ! -f $original_file_name ]
then
curl -v -H 'referer: https://www.pixiv.net/' $page_original_url -o $original_file_name
fi
if [ `du -b $original_file_name|awk '{print $1}'` -gt $maxFileSize ]
then
echo "图片:$original_file_name 体积:$original_file_size 超过 $maxFileSize_M,需要压缩"
scp -i ~/.ssh/$CompressHost -P $CompressPort $original_file_name $CompressUser@$CompressHost:$CompressPath/$original_file_name
ssh -i ~/.ssh/$CompressHost -p $CompressPort $CompressUser@$CompressHost "cd $CompressPath;jpegoptim --size=$maxFileSize_M $original_file_name"
scp -i ~/.ssh/$CompressHost -P $CompressPort $CompressUser@$CompressHost:$CompressPath/$original_file_name $original_file_name
echo "图片:$original_file_name 压缩体积:`du -h $original_file_name`"
fi
upload $original_file_name
fi
if [ ! -f $webp_file_name ]
then
webp_url="https://$bucketname.$accelerateHost/$original_file_name$imageParam"
echo "download image file name=$webp_file_name,url=$webp_url"
curl -v $webp_url -o $webp_file_name
image_ratio=`file $webp_file_name|awk '{print $9}'|sed -e 's/,//'|sed '/^$/d'|awk '{split($0,a,"x");f=a[1]>a[2]?a[1]/a[2]:a[2]/a[1];print f}'`
if [[ -n $image_ratio && $image_ratio > $maxRatio ]]
then
echo "图片分辨率`file $webp_file_name|awk '{print $9}'|sed -e 's/,//'`比率:$image_ratio>$maxRatio,视为异常,返回异常图片"
watermark=`echo "画作pid:$pid"|base64`
url="https://$bucketname.$accelerateHost/error.jpeg?x-oss-process=image/watermark,text_`echo "画作pid:$pid"|base64`,g_north/watermark,text_5YiG6L6o546H5q%2BU5L6L5byC5bi4Cg==,g_center/watermark,text_5bi46KeB5LqO6LaF6ZW_5Zu-,g_south"
echo "文字水印请求url:$url"
curl -v $url -o $webp_file_name
fi
fi
if [ $page -eq 0 ]
then
info_file=$original_file_name.info
original_image_info=`curl -v --output $info_file https://$bucketname.$Host/$original_file_name?x-oss-process=image/info`
FileSize=$(fileSizeStr $(cat $info_file|jq -r .FileSize.value))
ImageHeight=$(cat $info_file|jq -r .ImageHeight.value)
ImageWidth=$(cat $info_file|jq -r .ImageWidth.value)
media="$media,{\"type\":\"photo\",\"media\":\"attach://$webp_file_name\",\"parse_mode\":\"HTML\",\"caption\":\"$rank_info\n<a href=\\\"$artworkLink\\\">$title</a>\n<a href=\\\"https://www.pixiv.net/users/$userId\\\">$userName</a>\n$tag\n原图分辨率:${ImageWidth}X${ImageHeight}(宽X高),文件体积:$FileSize,<a href=\\\"https://$bucketname.$Host/$original_file_name\\\">预览原图</a> <a href=\\\"https://$bucketname.$accelerateHost/$original_file_name\\\">加速下载原图</a>\"}"
fileList="$fileList -F $webp_file_name=@$webp_file_name"
fileCount=$((fileCount + 1))
fileSize=`du $webp_file_name | awk '{print $1}'`
fileCountSize=$((fileCountSize + fileSize))
echo "fileCountSize=$fileCountSize,fileCount=$fileCount"
fi
if [[ $fileCount -eq $maxFileCount ]]
then
command="curl -v -F chat_id=$chat_id $fileList -F media='[`echo $media | cut -c 2-`]' $baseApi/sendMediaGroup"
echo "上传图片命令:$command"
bash -c "$command"
sleep $sleepImage
curl -v -d chat_id=$chat_id -d text="以上作品日榜排名分别是 #排名${start_rank}_${end_rank} #rank${start_rank}_${end_rank} ,点击作品可以查看pid/标题/画师/tag信息." $baseApi/sendMessage
sleep $sleepText
fileCountSize=0
fileList=''
media=''
fileCount=0
fi
done
done
next_expected_at=`curl -v https://cronitor.io/api/monitors/$CronitorJobName -u $CronitorKey:''|jq .next_expected_at`
curl -v -d chat_id=$chat_id -d text="以上就是$today日榜前${length}名作品,本次推送完毕,下次推送时间预计是`date -d @$next_expected_at '+%Y-%m-%d %H:%M:%S'`,如有问题请联系管理员。 #date$_today #日期$_today " $baseApi/sendMessage