materially it's just comments. still though.

This commit is contained in:
adam 2025-03-30 20:02:14 -04:00
parent 772fd6a758
commit 9bfbabc5f6

60
yt-data-pull.sh Executable file
View File

@ -0,0 +1,60 @@
#!/bin/bash
# while read p; do ./yt-data-pull.sh $p; done<fastlist.txt
. ~/.profile
. ~/.bashrc
function chat(){
msgString=$@
curl -XPOST \
-d "$(jq -cn --arg msgtype 'm.text' --arg body "$msgString" '{msg: $ARGS.named}' | jq -s add | jq .msg)" \
"https://greyn.club:8448/_matrix/client/r0/rooms/$chatTarget/send/m.room.message?access_token=$chatToken"
}
videoID=$1
if [[ -d $2 ]] ; then
echo "$2 exists"
else
mkdir $2
fi
pushd $2
chatToken=$3
chatTarget=$4
yt-dlp --skip-download --cookies-from-browser firefox "https://www.youtube.com/watch?v=$videoID"
sleep 5;
yt-dlp --skip-download --cookies-from-browser firefox "https://www.youtube.com/watch?v=$videoID"
sleep 5;
# theoretically this should work, but youtube thinks wget is a crime
# (when *you* do it. rules for thee not for me, of course.)
yt-dlp \
--write-comments --skip-download \
--dump-single-json \
"https://www.youtube.com/watch?v=$videoID" \
>info.json
cat info.json | jq -r .comments[].id >commentids.txt
touch done.txt
while read cid
do
echo "cid: --> $cid."
if grep "$cid" done.txt
then
#already seen
echo "already seen $cid"
else
cat info.json | jq .comments[] | jq 'select(.id=="'$cid'")'>thiscomment.json
echo $cid >>done.txt
message="[YT]/[$(cat info.json | jq -r .title)]/[$(cat thiscomment.json | jq -r .author)]: $(cat thiscomment.json | jq -r .text)"
rm thiscomment.json
chat $message
fi
done<commentids.txt
rm commentids.txt
jq .comments[] ./info.json | jq --raw-output '[.id, .parent, .timestamp, .author_url, .author, .text] | @csv' >comments.csv
popd