wget –spider –no-check-certificate –force-html -r -l2 https://website.of.interest.com 2>&1 | grep ‘^–‘ | awk ‘{ print $3 }’ | grep -v ‘\.\(css\|js\|png\|gif\|jpg\)$’ >> url-list.txt which will give something like: https://website.of.interest.com/en/Category:BRA https://website.of.interest.com/en/Category:Free_software https://website.of.interest.com/en/Category:Freeware https://website.of.interest.com/en/Special:RecentChangesLinked/Software_overview https://website.of.interest.com/en/Special:WhatLinksHere/Software_overview https://website.of.interest.com/en/Talk:Software_overview https://website.of.interest.com/ithelp/index.php?title=Software_overview&action=edit https://website.of.interest.com/ithelp/index.php?title=Software_overview&action=history https://website.of.interest.com/ithelp/index.php?title=Software_overview&action=info https://website.of.interest.com/ithelp/index.php?title=Software_overview&oldid=2586 https://website.of.interest.com/ithelp/index.php?title=Special:Pdfprint&page=Software_overview https://website.of.interest.com/ithelp/index.php?title=Special:UserLogin&returnto=Software+overview https://website.of.interest.com/ithelp/index.php?title=Talk:Software_overview&action=edit&redlink=1