travisjneuman
diff --git a/‎projects/modules/01-web-scraping/01-fetch-a-webpage/tests/test_project.py‎
Lines changed: 135 additions & 0 deletions b/‎projects/modules/01-web-scraping/01-fetch-a-webpage/tests/test_project.py‎
Lines changed: 135 additions & 0 deletions
diff --git a/‎projects/modules/01-web-scraping/02-parse-html/tests/test_project.py‎
Lines changed: 152 additions & 0 deletions b/‎projects/modules/01-web-scraping/02-parse-html/tests/test_project.py‎
Lines changed: 152 additions & 0 deletions
@@ -0,0 +1,135 @@
+"""Tests for Module 01 / Project 01 — Fetch a Webpage.
+
+These tests verify that fetch_page() and display_response_info() work
+correctly WITHOUT making real HTTP requests. We use unittest.mock.patch
+to replace requests.get() with a fake that returns controlled data.
+
+WHY mock HTTP requests?
+- Tests should be fast and not depend on network access.
+- The real server might be slow, down, or rate-limit us.
+- Mocking lets us test our code in isolation from external services.
+"""
+
+import sys
+import os
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+# Add the project directory to the Python path so we can import project.py.
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+from project import fetch_page, display_response_info
+
+
+# ---------------------------------------------------------------------------
+# Helpers: create fake response objects
+# ---------------------------------------------------------------------------
+
+def make_fake_response(status_code=200, text="<html>Hello</html>", headers=None):
+    """Build a MagicMock that behaves like a requests.Response object.
+
+    MagicMock is a flexible fake object — you can set any attribute on it,
+    and it will just work. This is much simpler than creating a real Response.
+    """
+    response = MagicMock()
+    response.status_code = status_code
+    response.text = text
+    response.headers = headers or {"Content-Type": "text/html; charset=utf-8"}
+    return response
+
+
+# ---------------------------------------------------------------------------
+# Tests for fetch_page()
+# ---------------------------------------------------------------------------
+
+@patch("project.requests.get")
+def test_fetch_page_returns_response_object(mock_get):
+    """fetch_page() should return the response object from requests.get().
+
+    We check that:
+    1. requests.get() is called with the URL we pass in.
+    2. The return value is the response object (not .text, not .json()).
+    """
+    fake = make_fake_response()
+    mock_get.return_value = fake
+
+    result = fetch_page("http://example.com")
+
+    # The function should have called requests.get with our URL.
+    mock_get.assert_called_once_with("http://example.com")
+
+    # The function should return the full response object.
+    assert result is fake
+
+
+@patch("project.requests.get")
+def test_fetch_page_passes_through_error_status(mock_get):
+    """fetch_page() should return the response even when the status is not 200.
+
+    The function itself does not raise on error status codes — that is
+    handled by the caller (main). We verify the response comes back as-is.
+    """
+    fake = make_fake_response(status_code=404, text="Not Found")
+    mock_get.return_value = fake
+
+    result = fetch_page("http://example.com/missing")
+
+    assert result.status_code == 404
+
+
+# ---------------------------------------------------------------------------
+# Tests for display_response_info()
+# ---------------------------------------------------------------------------
+
+def test_display_response_info_prints_status_code(capsys):
+    """display_response_info() should print the status code of the response.
+
+    capsys is a pytest fixture that captures stdout. We call the function,
+    then check that the captured output contains the expected text.
+    """
+    fake = make_fake_response(status_code=200, text="A" * 600)
+
+    display_response_info(fake)
+
+    output = capsys.readouterr().out
+    assert "200" in output
+
+
+def test_display_response_info_prints_content_type(capsys):
+    """display_response_info() should print the Content-Type header."""
+    fake = make_fake_response(headers={"Content-Type": "text/html"})
+
+    display_response_info(fake)
+
+    output = capsys.readouterr().out
+    assert "text/html" in output
+
+
+def test_display_response_info_prints_content_length(capsys):
+    """display_response_info() should print the character count of the body.
+
+    We pass a body with a known length and verify the number appears in output.
+    """
+    body = "x" * 42
+    fake = make_fake_response(text=body)
+
+    display_response_info(fake)
+
+    output = capsys.readouterr().out
+    assert "42" in output
+
+
+def test_display_response_info_shows_preview(capsys):
+    """display_response_info() should show the first 500 characters of the body.
+
+    If the body is longer than 500 characters, only the first 500 should appear.
+    """
+    body = "Hello World! " * 100  # Much longer than 500 chars
+    fake = make_fake_response(text=body)
+
+    display_response_info(fake)
+
+    output = capsys.readouterr().out
+    # The preview should contain text from the body.
+    assert "Hello World!" in output
@@ -0,0 +1,152 @@
+"""Tests for Module 01 / Project 02 — Parse HTML.
+
+These tests verify that fetch_page() and parse_books() correctly fetch
+and parse HTML from books.toscrape.com. All HTTP requests are mocked —
+we provide sample HTML that mirrors the real site's structure.
+
+WHY provide sample HTML?
+- We control exactly what the parser receives, so tests are deterministic.
+- If the real site changes its HTML, our parser tests still pass because
+  we are testing OUR parsing logic, not the external site's stability.
+"""
+
+import sys
+import os
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+from project import fetch_page, parse_books, display_books
+
+
+# ---------------------------------------------------------------------------
+# Sample HTML that mirrors the structure of books.toscrape.com
+# ---------------------------------------------------------------------------
+
+SAMPLE_HTML = """
+<html>
+<body>
+<article class="product_pod">
+  <h3><a href="catalogue/book1.html" title="Test Book One">Test Book One</a></h3>
+  <p class="price_color">£51.77</p>
+</article>
+<article class="product_pod">
+  <h3><a href="catalogue/book2.html" title="Test Book Two">Test Book Two</a></h3>
+  <p class="price_color">£23.99</p>
+</article>
+</body>
+</html>
+"""
+
+
+# ---------------------------------------------------------------------------
+# Tests for fetch_page()
+# ---------------------------------------------------------------------------
+
+@patch("project.requests.get")
+def test_fetch_page_returns_text_on_success(mock_get):
+    """fetch_page() should return response.text when status is 200.
+
+    The function returns the raw HTML string so that parse_books() can
+    process it. On success (200), we get the text; on failure, we get None.
+    """
+    fake_response = MagicMock()
+    fake_response.status_code = 200
+    fake_response.text = "<html>OK</html>"
+    mock_get.return_value = fake_response
+
+    result = fetch_page("http://example.com")
+
+    assert result == "<html>OK</html>"
+
+
+@patch("project.requests.get")
+def test_fetch_page_returns_none_on_failure(mock_get):
+    """fetch_page() should return None when the HTTP status is not 200.
+
+    This tells the caller that the page could not be fetched, so it
+    should skip parsing and display an error message.
+    """
+    fake_response = MagicMock()
+    fake_response.status_code = 500
+    mock_get.return_value = fake_response
+
+    result = fetch_page("http://example.com")
+
+    assert result is None
+
+
+# ---------------------------------------------------------------------------
+# Tests for parse_books()
+# ---------------------------------------------------------------------------
+
+def test_parse_books_extracts_correct_count():
+    """parse_books() should find all <article class='product_pod'> elements.
+
+    Our sample HTML has 2 articles, so we expect 2 books.
+    """
+    books = parse_books(SAMPLE_HTML)
+
+    assert len(books) == 2
+
+
+def test_parse_books_extracts_titles():
+    """parse_books() should extract the book title from the <a> tag's title attribute.
+
+    The title attribute holds the full title (the visible text may be truncated).
+    """
+    books = parse_books(SAMPLE_HTML)
+
+    assert books[0][0] == "Test Book One"
+    assert books[1][0] == "Test Book Two"
+
+
+def test_parse_books_extracts_prices():
+    """parse_books() should extract the price text from the price_color <p> tag.
+
+    Prices include the currency symbol (£) and are stripped of whitespace.
+    """
+    books = parse_books(SAMPLE_HTML)
+
+    assert books[0][1] == "£51.77"
+    assert books[1][1] == "£23.99"
+
+
+def test_parse_books_returns_tuples():
+    """parse_books() should return a list of (title, price) tuples.
+
+    Each tuple has exactly two elements: the title string and the price string.
+    """
+    books = parse_books(SAMPLE_HTML)
+
+    for book in books:
+        assert isinstance(book, tuple)
+        assert len(book) == 2
+
+
+def test_parse_books_empty_html():
+    """parse_books() should return an empty list for HTML with no articles.
+
+    This handles the edge case where the page loads but has no book listings.
+    """
+    empty_html = "<html><body><p>No books here</p></body></html>"
+    books = parse_books(empty_html)
+
+    assert books == []
+
+
+# ---------------------------------------------------------------------------
+# Tests for display_books()
+# ---------------------------------------------------------------------------
+
+def test_display_books_shows_all_titles(capsys):
+    """display_books() should print every book title in the output."""
+    books = [("Book A", "£10.00"), ("Book B", "£20.00")]
+
+    display_books(books)
+
+    output = capsys.readouterr().out
+    assert "Book A" in output
+    assert "Book B" in output