upbeatBytes/tests/test_feed_images.py

from goodnews.feeds import _img_from_html, parse_feed

def test_img_from_html_finds_first_src():
    assert _img_from_html('<p>hi</p><img src="https://x.com/a.jpg" alt="">') == "https://x.com/a.jpg"
    assert _img_from_html("no images here") is None
    assert _img_from_html(None) is None

RSS = b"""<?xml version="1.0"?><rss xmlns:content="http://purl.org/rss/1.0/modules/content/"><channel>
<item><title>Story</title><link>https://e.com/1</link>
<content:encoded><![CDATA[<p>lead</p><img src="https://e.com/photo.jpg"/> more]]></content:encoded></item>
<item><title>NoImg</title><link>https://e.com/2</link><description>just text</description></item>
</channel></rss>"""

def test_parse_feed_pulls_image_from_content_html():
    items = parse_feed(RSS)
    assert items[0].image_url == "https://e.com/photo.jpg"
    assert items[1].image_url is None  # opportunistic: none when absent