#! /bin/bash # # TT(Y)zara, a Tristan Tzara-style dadaist poetry generator. # # Copyleft (ↄ) 2019 jkirchartz # # Distributed under terms of the NPL (Necessary Public License) license. # # set -o xtrace # grab an article from: content="" while [ -z "$content" ]; do num=$(shuf -i0-11 -n1) case "$num" in [1][1]) ur1=$(lynx -dump -listonly https://threadreaderapp.com/thread/recent | grep "\\/thread\\/[0-9]" | rev | cut -d' ' -f1 | rev | shuf -n1) content=$(lynx -nolist -dump "$url" | sed -n '/(BUTTON) My Authors/,/• • •/{//!p}') ;; [1][0]) # Flowers of Evil url=$(lynx -dump https://fleursdumal.org/alphabetical-listing | grep "https.*poem" | rev | cut -d' ' -f1 | rev | shuf -n1) content=$(lynx -nolist -dump "$url" | sed -n '/Fleurs du mal/,/Navigation/{//!p}') ;; [9]) # Public Domain Poetry url=$(lynx -dump https://www.public-domain-poetry.com/ | rev | cut -d' ' -f1 | rev | grep "https://www.*[[:digit:]]" | tail -n +2 | shuf -n1) content=$(lynx -dump "$url" | sed -n '/By \[/,/Extra Info:/{//!p}') ;; [9]) # Textfiles.com url=$(lynx -dump "https://github.com/opsxcq/mirror-textfiles.com/search?l=Text&p=$(shuf -i1-100 -n1)" | grep blob | shuf -n1 | sed -e 's/.*textfiles\.com/http:\/\/textfiles\.com/') content=$(lynx -nolist -dump "$url") ;; [7]) # Wikipedia url=$(wget -qSO- https://en.wikipedia.org/wiki/Special:Random 2>&1 | grep Location | head -n1 | rev | cut -d' ' -f1 | rev) content=$(lynx -nolist -dump "$url" | sed -n '/Contents/,/Categories:/{//!p}') ;; [6]) # NPR url=$(lynx -dump -listonly https://text.npr.org/ | grep "[0-9][0-9][0-9][0-9][0-9]" | rev | cut -d' ' -f1 | rev | shuf -n 1); content=$(lynx -nolist -dump "$url" | sed -n '/_______/,/Topics\n/{//!p}') ;; # [6]) RETIRED: some pages return PDFs or other useless data # # 391.org dada manifestos # url=$(lynx -dump https://391.org/manifestos/page/$(shuf -i2-8 -n1)/ | grep manifestos/[0-9] | grep -v page | rev | cut -d' ' -f1 | rev | shuf -n1) # content=$(lynx -nolist -dump $url | sed -n '/Manifesto\s+\n\s+by/,/Published in/{//!p}') # ;; [5]) # The Anarchist Library url=$(lynx -dump -listonly "https://theanarchistlibrary.org/stats/popular/$(shuf -i1-638 -n1)" | grep "\\/library\\/" | rev | cut -d' ' -f1 | rev | shuf -n1); content=$(lynx -nolist -dump "$url" | sed -n '/* Add a new text/,/Random.*RSS feed.*Titles/{//!p}') ;; # RETIRED: http errors? # [5]) # # Reuters # url=$(lynx -dump https://www.reuters.com | grep article | rev | cut -d' ' -f1 | rev | shuf -n1); # content=$(lynx -nolist -dump "$url" | sed -n '/Slideshow/,/About the Author/{//!p}') # ;; [4]) # Christian Science Monitor url=$(lynx -dump https://www.csmonitor.com/layout/set/text/textedition | grep "\\/20" | rev | cut -d' ' -f1 | rev | shuf -n1); content=$(lynx -nolist -dump "$url" | sed -n '/By[^|]+\|/,/Full HTML version/{//!p}') ;; [3]) # CNN url=$(lynx -dump https://lite.cnn.com | grep article | rev | cut -d' ' -f1 | rev | shuf -n1); content=$(lynx -nolist -dump "$url" | sed -n '/Source:/,/_____/{//!p}') ;; [2]) # Folding Story url=$(lynx -dump http://foldingstory.com/read/ | grep http | tail -n 10 | rev | cut -d' ' -f1 | rev | shuf -n1) content=$(lynx -nolist -dump "${url}/full/" | sed -n '/Paragraph/,/\* Started by/{//!p}') ;; [1]) # Popular Mechanics url=$(lynx -listonly -dump https://www.popularmechanics.com/ | grep "[a-z][0-9][0-9][0-9][0-9][0-9]" | rev | cut -d' ' -f1 | rev | sort -u | shuf -n 1) content=$(lynx -nolist -dump "${url}" | sed -n '/Type keyword(s)/,/(BUTTON)/{//!p}') ;; *) # Dreams url=$(lynx -dump http://www.dreamjournal.net/main/dreams.cfm?timeframe=month | grep /journal/ | grep -v /user/ | shuf -n1 | rev | cut -d' ' -f1 | rev) content=$(lynx -nolist -dump "$url" | sed -n '/Views:/,/Themes\n/{//!p}') ;; esac; done; linepattern='' numpatterns=$(shuf -i2-5 -n1) for i in {1..$numpatterns}; do pattern=$(shuf -i1-5 -n1) case $pattern in [4]) linepattern="${linepattern}n;n;n;n;n;G;" ;; [3]) linepattern="${linepattern}n;n;n;G;" ;; [2]) linepattern="${linepattern}n;n;G;" ;; *) linepattern="${linepattern}n;n;n;n;G;" ;; esac; done; # generate poem FLIP=$(($((RANDOM%10))%2)) if [ $FLIP -eq 1 ];then echo -e "$content" | sed -e "s/\\[[^\\]]*\\]//g" | sed -e "/^[ \\t]*\\*/d" | grep -o -E "[A-Za-z\\'-]+" |\ shuf | tr '\n' ' ' | fold -sw "$(shuf -i 20-90 -n1)" |\ shuf -n "$(shuf -i 3-20 -n1)" | tr '\n' ' ' | fold -sw "$(shuf -i 20-60 -n1)" |\ sed "${linepattern}" else echo -e "$content" | sed -e "s/\\[[^\\]]*\\]//g" | sed -e "/^[ \\t]*\\*/d" | grep -o -E "[A-Za-z\\'-]+" |\ shuf | tr '\n' ' ' | fold -sw "$(shuf -i 60-100 -n1)" |\ shuf -n "$(shuf -i 3-10 -n1)" | fold -sw "$(shuf -i 13-30 -n1)" |\ sed "${linepattern}" fi # pipe-by-by rundown: # Fetch & Clean Article # 1: lynx: dump contents of URL # 3: sed: remove lynx-syntax images # 4: sed: normalize multiple whitespace characters # 5: grep: only keep letters ' and - # Put in bag & Shake # 6: snuf: shuffle words # Pull out words (not quite one-at-a-time, but forced into the shape of a poem) # 7: tr: convert to one long line # 8: fold: fold text into lines 60-100 characters long (mindful not to split words) # 9: shuf: shuffle lines, only return 3-20 of them # 10: fold: fold text into lines 13-50 characters long (mindful not to split words) # 11: sed: split text into stanzas 3-6 lines long echo -e "\\n\\n by T.T(Y)zara\\n (from ${url})"