Skip to content

wget2

Installation

emerge --ask net-misc/wget2

Read exported bookmark config and crawl ALL bookmaŕked sites

wget2 --spider --force-html -i bookmarks_5_1_22.html 

Spidering

wget2 --robots=off --force-html --spider -r  https://www.nr1.nu

Dowwnload specifik file type

wget2   \
    --method=GET  \
    --http-user='fbi@info.gov' \
    --http-password='hidden@mail.gov' \
    --referer='https://fbi.gov/secr3t/crawler' \
    --user-agent='(FBI Crawler/v1.0.1|ForRealSeriousCrime|WeCrawlingForObtainingEvidence) AppleIsMalware/v1.0)'  \
    --save-headers \
    --auth-no-challenge \
    --header="Accept-Encoding: all" \
    --secure-protocol=auto \
    --http2=on \
    --https-enforce=soft \
    -A '*.html' -r \
    https://www.nr1.nu 

crawling as a pro (wuseman edition)

function wget2() {
wget2   
    --method=GET \
    --http-user='fbi@info.gov' --http-password='hidden@mail.gov' \
    --referer='https://fbi.gov/secr3t/crawler' \
    --user-agent='(FBI Crawler/v1.0.1|ForRealSeriousCrime|WeCrawlingForObtainingEvidence) AppleIsMalware/v1.0)' \
    --adjust-extension \
#    --http2-request-window=250 # Set max. number of parallel streams per HTTP/2 connection (default: 30).
    -o /home/wuseman/logs/wget2/wget2.log \
    --stats-site=h:/home/wuseman/logs/wget2/stats-site.log \
    --stats-server=h:/home/wuseman/logs/wget2/-stats-server.log \
    --stats-tls=h:/home/wuseman/logs/wget2/stats-tls.log \
    --stats-ocsp=h:/home/wuseman/logs/wget2/stats-oscp.log \
    --stats-dns=h:/home/wuseman/logs/wget2/stats-dns.log \
    --progress=bar \
    --backups=backups \
    --force-progress \
    --server-response \
    --quote=0 \ # Unlimited
    --inet4-only \ 
    --tcp-fastopen \
    --dns-caching \ # Keep results of DNS lookups in memory to speed up connections.
    --user=yourFriend \
    --password=yourFriend \
    --http-user=yourFriend \
    --http-password=yourFriend
    --local-encoding=encoding \
    --remote-encoding=encoding \
    --chunk-size=10M \
    --verify-save-failed \
    --robots=off    
    -header='Accept-Charset: iso-8859-2'  \
    --max-redirect=250 \
    --referer=https://fbi.gov/youfuckingsucks \
    #--cut-dirs=100 \           # Download all files in main dir until 100 sub folders \
    # --unlink \
    # --spider \
    # --limit-rate=20k \ ### Limit to 20kb/s
    # --random-wait \ ##  This option causes the time between requests to vary between 0.5 and 1.5 ### wait seconds,
    }

  • Comments are closed on this article!

Last update: December 4, 2022 19:31:07