Source code for PyXA.Additions.Web
""".. versionadded:: 0.1.0
Internet-related automation features that extend PyXA scripting functionality.
"""
from datetime import datetime
from typing import List, Union
import AppKit
import requests
from bs4 import BeautifulSoup
from PyXA import XABase
[docs]
class RSSFeed(XABase.XAObject):
"""An RSS feed reader.
.. versionadded:: 0.1.0
"""
def __init__(self, url: Union[str, XABase.XAURL]):
self.xa_apsp = AppKit.NSApplication.sharedApplication()
self.xa_wksp = AppKit.NSWorkspace.sharedWorkspace()
self.xa_aref = None
self.xa_sevt = None
if isinstance(url, XABase.XAURL):
url = url.url
self.url = url
request = requests.get(url)
self.__soup = BeautifulSoup(request.content, features="xml")
[docs]
def items(self) -> "RSSItemList":
"""Retrieves all item and/or entry tags in the RSS feed as :class:`RSSItem` objects.
:return: The list of items and/or entries
:rtype: RSSItemList
:Example:
>>> import PyXA
>>> reader = PyXA.RSSFeed("http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/topsongs/limit=10/xml")
>>> print(reader.items())
<<class 'PyXA.extensions.XAWeb.RSSItemList'>['Hold Me Closer - Elton John & Britney Spears', 'Only Ever Wanted - Timcast', "I Ain't Worried - OneRepublic", 'wait in the truck - HARDY & Lainey Wilson', 'Bring Me to Life - Evanescence', 'Running Up That Hill (A Deal with God) - Kate Bush', 'Beer With My Friends - Kenny Chesney & Old Dominion', 'American Pie (Full Length Version) - Don Mclean', 'She Had Me At Heads Carolina - Cole Swindell', 'You Proof - Morgan Wallen']>
.. versionadded:: 0.1.0
"""
articles = self.__soup.findAll("entry")
if articles == []:
articles = self.__soup.findAll("item")
return self._new_element(articles, RSSItemList)
[docs]
def refetch(self):
"""Resends the GET request for the RSS feed URL and updates this object's data accordingly.
:Example: Get the top 10 songs on iTunes every hour
>>> import PyXA
>>> from time import sleep
>>> reader = PyXA.RSSFeed("http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/topsongs/limit=10/xml")
>>> while True:
>>> reader.refetch()
>>> print(reader.items())
>>> sleep(3600)
.. versionadded:: 0.1.0
"""
request = requests.get(self.url)
self.__soup = BeautifulSoup(request.content, features="xml")
[docs]
class RSSItemList(XABase.XAList):
def __init__(self, properties):
super().__init__(properties, RSSItem)
[docs]
def xml(self) -> List[str]:
"""Gets the raw XML of each item in the list.
:return: The list of XML strings
:rtype: List[str]
.. versionadded:: 0.1.0
"""
return [str(x) for x in self.xa_elem]
[docs]
def content(self) -> "RSSItemContentList":
"""Gets the content of each item as :class:`RSSItemContent` objects.
:return: The list of item contents
:rtype: RSSItemContentList
.. versionadded:: 0.1.0
"""
contents = []
for item in self.xa_elem:
html = str(item.find("content").string)
content_object = BeautifulSoup(html, "html.parser")
contents.append(content_object)
return self._new_element(contents, RSSItemContentList)
[docs]
def title(self) -> List[str]:
"""Gets the title of each item in the list.
:return: The list of RSS item titles
:rtype: List[str]
.. versionadded:: 0.1.0
"""
return [x.find("title").text for x in self.xa_elem]
[docs]
def author(self) -> List[str]:
"""Gets the author of each item in the list.
:return: The list of RSS item authors
:rtype: List[str]
.. versionadded:: 0.1.0
"""
return [x.find("author").text for x in self.xa_elem]
[docs]
def category(self) -> List[str]:
"""Gets the category of each item in the list.
:return: The list of RSS item categories
:rtype: List[str]
.. versionadded:: 0.1.0
"""
return [x.find("category").text for x in self.xa_elem]
[docs]
def comments(self) -> List[str]:
"""Gets the comments of each item in the list.
:return: The list of RSS item comments
:rtype: List[str]
.. versionadded:: 0.1.0
"""
return [x.find("comments").text for x in self.xa_elem]
[docs]
def description(self) -> "RSSItemContentList":
"""Gets the description of each item as :class:`RSSItemContent` objects.
:return: The list of item descriptions
:rtype: RSSItemContentList
.. versionadded:: 0.1.0
"""
contents = []
for item in self.xa_elem:
html = str(item.find("description").string)
content_object = BeautifulSoup(html, "html.parser")
contents.append(content_object)
return self._new_element(contents, RSSItemContentList)
[docs]
def enclosure(self) -> List[str]:
"""Gets the enclosure of each item in the list.
:return: The list of RSS item enclosures
:rtype: List[str]
.. versionadded:: 0.1.0
"""
return [x.find("enclosure").text for x in self.xa_elem]
[docs]
def link(self) -> List[str]:
"""Gets the link of each item in the list.
:return: The list of RSS item links
:rtype: List[str]
.. versionadded:: 0.1.0
"""
return [XABase.XAURL(x.find("link").text) for x in self.xa_elem]
[docs]
def publication_date(self) -> List[datetime]:
"""Gets the publication date of each item in the list.
:return: The list of RSS item publication dates
:rtype: List[datetime]
.. versionadded:: 0.1.0
"""
return [x.find("pubDate").text for x in self.xa_elem]
[docs]
def source(self) -> List[str]:
"""Gets the source of each item in the list.
:return: The list of RSS item sources
:rtype: List[str]
.. versionadded:: 0.1.0
"""
return [x.find("source").text for x in self.xa_elem]
[docs]
def copyright(self) -> List[str]:
"""Gets the copyright of each item in the list.
:return: The list of RSS item copyrights
:rtype: List[str]
.. versionadded:: 0.1.0
"""
return [x.find("copyright").text for x in self.xa_elem]
[docs]
def text(self) -> List[str]:
"""Gets the text of each item in the list.
:return: The list of RSS item texts
:rtype: List[str]
.. versionadded:: 0.1.0
"""
return [x.text for x in self.xa_elem]
[docs]
def links(self) -> List[XABase.XAURL]:
"""Gets the links contained in each item of the list as :class:`XABase.XAURL` objects.
:return: The list of links
:rtype: List[XABase.XAURL]
.. versionadded:: 0.1.0
"""
return [x for y in self for x in y.links()]
def __repr__(self):
return "<" + str(type(self)) + str(self.title()) + ">"
[docs]
class RSSItem(XABase.XAObject):
"""An item or entry in an RSS feed.
.. versionadded:: 0.1.0
"""
def __init__(self, properties):
super().__init__(properties)
self.xml: str #: The raw XML of the entry
self.content: str #: The raw content of the entry
self.author: str #: The author of the entry
self.category: str #: The category of the entry
self.comments: str #: The comments of the entry
self.description: str #: The description of the entry
self.enclosure: str #: The media idea enclosed in the entry
self.link: str #: The hyperlink to the entry
self.publication_date: datetime #: The most recent publication date of the entry
self.source: str #: The third-party source of the entry
self.title: str #: The title of the RSS entry
self.copyright: str #: The copyright text of the entry
self.text: str #: All text within the entry (not just the description/content text!)
@property
def xml(self) -> str:
return str(self.xa_elem)
@property
def content(self) -> type:
html = str(self.xa_elem.find("content").string)
content_object = BeautifulSoup(html, "html.parser")
return self._new_element(content_object, RSSItemContent)
@property
def author(self) -> str:
tag = self.xa_elem.find("author")
if tag is not None:
return tag.text
@property
def category(self) -> str:
tag = self.xa_elem.find("category")
if tag is not None:
return tag.get("label")
@property
def comments(self) -> str:
tag = self.xa_elem.find("comments")
if tag is not None:
return tag.text
@property
def description(self) -> str:
tag = self.xa_elem.find("description")
if tag is not None:
return tag.string
@property
def enclosure(self) -> str:
return self.xa_elem.find("enclosure").get("url")
@property
def link(self) -> Union[str, None]:
tag = self.xa_elem.find("link")
if tag is not None:
return tag.text
@property
def publication_date(self) -> Union[str, None]:
tag = self.xa_elem.find("pubDate")
if tag is not None:
return tag.text
@property
def source(self) -> str:
tag = self.xa_elem.find("source")
if tag is not None:
return tag.text
@property
def title(self) -> str:
tag = self.xa_elem.find("title")
if tag is not None:
return tag.text
@property
def copyright(self) -> str:
tag = self.xa_elem.find("copyright")
if tag is None:
tag = self.xa_elem.find("rights")
if tag is not None:
return tag.text
@property
def text(self) -> str:
return self.xa_elem.text()
[docs]
def links(self) -> List[XABase.XAURL]:
"""Retrieves the URL referenced by each link tag as a list of :class:`XABase.XAURL` objects.
:return: The list of link URLs
:rtype: List[XABase.XAURL]
:Example:
>>> import PyXA
>>> reader = PyXA.RSSFeed("http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/topsongs/limit=10/xml")
>>> item = reader.items()[0]
>>> print(item.links())
[<<class 'PyXA.XABase.XAURL'>https://audio-ssl.itunes.apple.com/itunes-assets/AudioPreview122/v4/7a/24/60/7a246091-cef8-1df4-1435-a107ed3c6980/mzaf_8623157179635634843.plus.aac.p.m4a>, <<class 'PyXA.XABase.XAURL'>https://music.apple.com/us/album/hold-me-closer-single/1641082201?uo=2>, <<class 'PyXA.XABase.XAURL'>https://music.apple.com/us/album/hold-me-closer/1641082201?i=1641082205&uo=2>]
.. versionadded:: 0.1.0
"""
tags = self.xa_elem.findAll("link")
return [XABase.XAURL(x.get("href")) for x in set(tags)]
def __repr__(self):
return "<" + str(type(self)) + self.title + ">"
[docs]
class RSSItemContentList(XABase.XAList):
def __init__(self, properties):
super().__init__(properties, RSSItemContent)
[docs]
def html(self) -> List[str]:
"""Gets the raw HTML of each item in the list.
:return: The list of HTML strings
:rtype: List[str]
.. versionadded:: 0.1.0
"""
return [str(x) for x in self.xa_elem]
[docs]
def text(self) -> List[str]:
"""Gets the text of each item in the list.
:return: The list of content texts
:rtype: List[str]
.. versionadded:: 0.1.0
"""
return [x.text for x in self.xa_elem]
[docs]
def links(self) -> List[XABase.XAURL]:
"""Gets the links contained in each item of the list as :class:`XABase.XAURL` objects.
:return: The list of links
:rtype: List[XABase.XAURL]
.. versionadded:: 0.1.0
"""
return [x for y in self for x in y.links()]
[docs]
def images(self) -> List[XABase.XAImage]:
"""Gets the images contained in each item of the list as :class:`XABase.XAImage` objects.
:return: The list of links
:rtype: List[XABase.XAImage]
.. versionadded:: 0.1.0
"""
img_objects = []
for content_item in self.xa_elem:
imgs = content_item.findAll("img")
img_objects.extend(
[
AppKit.NSImage.alloc().initWithContentsOfURL_(
AppKit.NSURL.alloc().initWithString_(x.get("src"))
)
for x in imgs
]
)
return self._new_element(img_objects, XABase.XAImageList)
def __repr__(self):
return "<" + str(type(self)) + "Length: " + str(len(self)) + ">"
[docs]
class RSSItemContent(XABase.XAObject):
"""The content of an RSS entry.
.. versionadded:: 0.1.0
"""
def __init__(self, properties):
super().__init__(properties)
self.html: str #: The raw html of the content
self.text: str #: The visible text of the content
@property
def html(self) -> str:
return str(self.xa_elem)
@property
def text(self) -> str:
return self.xa_elem.text
[docs]
def links(self) -> List[XABase.XAURL]:
"""Retrieves the URL referenced by each anchor element as a list of :class:`XABase.XAURL` objects.
:return: The list of link URLs
:rtype: List[XABase.XAURL]
:Example:
>>> import PyXA
>>> reader = PyXA.RSSFeed("http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/topsongs/limit=10/xml")
>>> content = reader.items()[0].content
>>> print(content.links())
[<<class 'PyXA.XABase.XAURL'>https://music.apple.com/us/album/hold-me-closer/1641082201?i=1641082205&uo=2>, <<class 'PyXA.XABase.XAURL'>https://music.apple.com/us/album/hold-me-closer/1641082201?i=1641082205&uo=2>, <<class 'PyXA.XABase.XAURL'>https://music.apple.com/us/album/hold-me-closer-single/1641082201?uo=2>, <<class 'PyXA.XABase.XAURL'>https://music.apple.com/us/artist/elton-john/54657?uo=2>, <<class 'PyXA.XABase.XAURL'>https://music.apple.com/us/genre/music-pop/id14?uo=2>]
.. versionadded:: 0.1.0
"""
links = self.xa_elem.findAll("a")
return [XABase.XAURL(link.get("href")) for link in links]
[docs]
def images(self) -> List[XABase.XAImage]:
"""Retrieves the image referenced by each image element as a list of :class:`XABase.XAImage` objects.
:return: The list of images
:rtype: List[XABase.XAImage]
:Example:
>>> import PyXA
>>> reader = PyXA.RSSFeed("http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/topsongs/limit=10/xml")
>>> content = reader.items()[0].content
>>> print(content.images())
[<PyXA.XABase.XAImage object at 0x10635ee80>, <PyXA.XABase.XAImage object at 0x10635ebb0>]
.. versionadded:: 0.1.0
"""
imgs = self.xa_elem.findAll("img")
img_objects = [
AppKit.NSImage.alloc().initWithContentsOfURL_(
AppKit.NSURL.alloc().initWithString_(x.get("src"))
)
for x in imgs
]
return self._new_element(img_objects, XABase.XAImageList)