commit a0d43cb7100de27ed0b276794ce63ad571f3e98f Author: David Ashby Date: Sun May 20 17:45:39 2018 -0400 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b81c795 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.xml \ No newline at end of file diff --git a/feed.py b/feed.py new file mode 100755 index 0000000..2580549 --- /dev/null +++ b/feed.py @@ -0,0 +1,64 @@ +import requests +import argparse +from bs4 import BeautifulSoup +from feedgen.feed import FeedGenerator +from dateutil import parser +from datetime import timezone + +def feedbase(): + fg = FeedGenerator() + fg.load_extension('podcast') + fg.title('SCOTUS Audio') + fg.subtitle('This is an automated feed of the mp3 files from the SCOTUS website. NOT AFFILIATED WITH THE COURT OR THE JUSTICES.') + fg.link(href=LINK, rel='self') + fg.language('en') + if HOME: + fg.link(href=HOME, rel='alternate') + if LOGO: + fg.logo(LOGO) + return fg + +def get_filesize(argument_id): + return requests.head('https://www.supremecourt.gov/media/audio/mp3files/' + argument_id + '.mp3').headers['content-length'] + +def add_argument(feed, argument_id, argument_number, argument_title, argument_date, docket_number): + fe = feed.add_entry(order='append') + url = "https://www.supremecourt.gov/oral_arguments/audio/" + TERM + "/" + argument_id + fe.id(url) + fe.title(argument_number + ": " + argument_title) + fe.link(href=url) + fe.enclosure('https://www.supremecourt.gov/media/audio/mp3files/' + argument_id + '.mp3', get_filesize(argument_id), 'audio/mpeg') + fe.published(argument_date) + fe.description("The Supreme Court docket for this case is available at https://www.supremecourt.gov/docket/docketfiles/html/public/" + docket_number + ".html.") + +def parse_sessions(feed, sessions): + for session in sessions: + for argument in session.find_all("tr")[:0:-1]: # pop off the header and invert + argument_number = argument.a.string + if argument_number.endswith("-Orig"): + docket_number = "22o" + argument_number.split("-")[0] # magic number for now + else: + docket_number = argument_number + argument_id = argument.a['href'].split("/")[-1] + argument_title = argument.find_all("span")[1].string + argument_date = parser.parse(argument.find_all("td")[1].string).replace(tzinfo=timezone.utc) + add_argument(feed, argument_id, argument_number, argument_title, argument_date, docket_number) + +if __name__ == "__main__": + args = argparse.ArgumentParser(description='Generate an RSS feed for a particular term of the court.') + args.add_argument('--term', required=True, help="The term to generate the feed for.") + args.add_argument('--link', required=True, help="The URL of the completed feed.") + args.add_argument('--home', help="The landing page for the source of the audio. Suggested if using a logo.") + args.add_argument('--logo', help="The URL of a logo for the feed.") + arglist = args.parse_args() + + TERM = arglist.term + LINK = arglist.link + LOGO = arglist.logo + HOME = arglist.home + + content = requests.get("https://www.supremecourt.gov/oral_arguments/argument_audio/" + TERM).content + sessions = BeautifulSoup(content, "html.parser").find_all("table", class_="table table-bordered") + feed = feedbase() + parse_sessions(feed, sessions) + print(feed.rss_str(pretty=True).decode('utf-8')) diff --git a/logo.jpg b/logo.jpg new file mode 100644 index 0000000..fc4a1a8 Binary files /dev/null and b/logo.jpg differ diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..428a151 --- /dev/null +++ b/readme.md @@ -0,0 +1,16 @@ +# SCOTUSfeed + +This python script outputs an RSS feed of arguments listed on [the court's oral argument recordings page](https://www.supremecourt.gov/oral_arguments/oral_arguments.aspx). + +## Example + +Run `python feed.py -h` for full command-line instructions. The bare minimum looks like `python feed.py --term 2017 --link https://example.com/scotus.xml` + +### Optional Arguments + +`--logo URL` +`--home URL` + +## Logo Source + +`logo.jpg` comes from , which was released into the public domain.