Skip to content

Commit 7cbac13

Browse files
authored
URL slash fixes (#1614)
* add a page template for crawlers to impact stoires * move blocks around on impact page * ensure all sitemap.xml urls don't end in a slash * middleware cleanup - no trailing slashes on canonical urls * add cards to impact section of page
1 parent 548c544 commit 7cbac13

File tree

6 files changed

+191
-150
lines changed

6 files changed

+191
-150
lines changed

global_settings/views.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import inspect
2+
from django.contrib.sitemaps import views as sitemap_views
13
from django.http import HttpResponseServerError, HttpResponse
2-
4+
from wagtail.contrib.sitemaps.sitemap_generator import Sitemap
35
from global_settings.functions import invalidate_cloudfront_caches
46

57

@@ -13,3 +15,28 @@ def clear_entire_cache(request):
1315
invalidate_cloudfront_caches()
1416
response = '<html><body><p>All Caches Invalidated</p></body></html>'
1517
return HttpResponse(response)
18+
19+
20+
def index(request, sitemaps, **kwargs):
21+
sitemaps = prepare_sitemaps(request, sitemaps)
22+
return sitemap_views.index(request, sitemaps, **kwargs)
23+
24+
25+
def sitemap(request, sitemaps=None, **kwargs):
26+
if sitemaps:
27+
sitemaps = prepare_sitemaps(request, sitemaps)
28+
else:
29+
sitemaps = {"wagtail": Sitemap(request)}
30+
return sitemap_views.sitemap(request, sitemaps, **kwargs)
31+
32+
33+
def prepare_sitemaps(request, sitemaps):
34+
initialised_sitemaps = {}
35+
for name, sitemap_cls in sitemaps.items():
36+
if inspect.isclass(sitemap_cls) and issubclass(sitemap_cls, Sitemap):
37+
sitemap_instance = sitemap_cls(request)
38+
sitemap_instance.sitemap_urls = [url.rstrip('/') for url in sitemap_instance.sitemap_urls]
39+
initialised_sitemaps[name] = sitemap_instance
40+
else:
41+
initialised_sitemaps[name] = sitemap_cls
42+
return initialised_sitemaps

openstax/middleware.py

Lines changed: 88 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -14,189 +14,137 @@
1414
from pages.models import HomePage, Supporters, PrivacyPolicy, K12Subject, Subject, Subjects, RootPage
1515

1616

17-
class HttpSmartRedirectResponse(HttpResponsePermanentRedirect):
18-
pass
19-
20-
2117
class CommonMiddlewareAppendSlashWithoutRedirect(CommonMiddleware):
22-
""" This class converts HttpSmartRedirectResponse to the common response
23-
of Django view, without redirect. This is necessary to match status_codes
24-
for urls like /url?q=1 and /url/?q=1. If you don't use it, you will have 302
25-
code always on pages without slash.
18+
""" This class converts HttpResponsePermanentRedirect to the common response
19+
of Django view, without redirect. This is necessary to match status_codes
20+
for urls like /url?q=1 and /url/?q=1. If you don't use it, you will have 302
21+
code always on pages without slash.
2622
"""
27-
response_redirect_class = HttpSmartRedirectResponse
23+
response_redirect_class = HttpResponsePermanentRedirect
2824

29-
def __init__(self, *args, **kwargs):
30-
# create django request resolver
31-
self.handler = BaseHandler()
25+
def __init__(self, *args, **kwargs):
26+
# create django request resolver
27+
self.handler = BaseHandler()
3228

33-
# prevent recursive includes
34-
old = settings.MIDDLEWARE
35-
name = self.__module__ + '.' + self.__class__.__name__
36-
settings.MIDDLEWARE = [i for i in settings.MIDDLEWARE if i != name]
29+
# prevent recursive includes
30+
old = settings.MIDDLEWARE
31+
name = self.__module__ + '.' + self.__class__.__name__
32+
settings.MIDDLEWARE = [i for i in settings.MIDDLEWARE if i != name]
3733

38-
self.handler.load_middleware()
34+
self.handler.load_middleware()
3935

40-
settings.MIDDLEWARE = old
41-
super(CommonMiddlewareAppendSlashWithoutRedirect, self).__init__(*args, **kwargs)
36+
settings.MIDDLEWARE = old
37+
super().__init__(*args, **kwargs)
4238

43-
def get_full_path_with_slash(self, request):
44-
""" Return the full path of the request with a trailing slash appended
45-
without Exception in Debug mode
46-
"""
47-
new_path = request.get_full_path(force_append_slash=True)
48-
# Prevent construction of scheme relative urls.
49-
new_path = escape_leading_slashes(new_path)
50-
return new_path
39+
def get_full_path_with_slash(self, request):
40+
""" Return the full path of the request with a trailing slash appended
41+
without Exception in Debug mode
42+
"""
43+
# Prevent construction of scheme relative urls.
44+
new_path = request.get_full_path(force_append_slash=True)
45+
new_path = escape_leading_slashes(new_path)
46+
return new_path
5147

52-
def process_response(self, request, response):
53-
response = super(CommonMiddlewareAppendSlashWithoutRedirect, self).process_response(request, response)
48+
def process_response(self, request, response):
49+
response = super().process_response(request, response)
5450

55-
if isinstance(response, HttpSmartRedirectResponse):
56-
if not request.path.endswith('/'):
57-
request.path = request.path + '/'
58-
# we don't need query string in path_info because it's in request.GET already
59-
request.path_info = request.path
60-
response = self.handler.get_response(request)
51+
if isinstance(response, HttpResponsePermanentRedirect):
52+
if not request.path.endswith('/'):
53+
request.path = request.path + '/'
54+
# we don't need query string in path_info because it's in request.GET already
55+
response = self.handler.get_response(request)
6156

62-
return response
57+
return response
6358

6459

6560
class CommonMiddlewareOpenGraphRedirect(CommonMiddleware):
66-
OG_USER_AGENTS = [
67-
'baiduspider',
68-
'bingbot',
69-
'embedly',
70-
'facebookbot',
71-
'facebookexternalhit/1.1',
72-
'facebookexternalhit',
73-
'facebot',
74-
'google.*snippet',
75-
'googlebot',
76-
'linkedinbot',
77-
'MetadataScraper',
78-
'outbrain',
79-
'pinterest',
80-
'pinterestbot',
81-
'quora',
82-
'quora link preview',
83-
'rogerbot',
84-
'showyoubot',
85-
'slackbot',
86-
'slackbot-linkexpanding',
87-
'twitterbot',
88-
'vkShare',
89-
'W3C_Validator',
90-
'WhatsApp',
91-
'MetadataScraper',
92-
'yandex',
93-
'yahoo',
94-
]
61+
OG_USER_AGENTS = {
62+
'baiduspider', 'bingbot', 'embedly', 'facebookbot', 'facebookexternalhit/1.1',
63+
'facebookexternalhit', 'facebot', 'google.*snippet', 'googlebot', 'linkedinbot',
64+
'MetadataScraper', 'outbrain', 'pinterest', 'pinterestbot', 'quora', 'quora link preview',
65+
'rogerbot', 'showyoubot', 'slackbot', 'slackbot-linkexpanding', 'twitterbot', 'vkShare',
66+
'W3C_Validator', 'WhatsApp', 'yandex', 'yahoo'
67+
}
9568

9669
def __init__(self, get_response):
9770
self.get_response = get_response
9871

9972
def __call__(self, request, *args, **kwargs):
10073
if 'HTTP_USER_AGENT' in request.META:
101-
10274
user_agent = user_agent_parser.ParseUserAgent(request.META["HTTP_USER_AGENT"])
10375
if user_agent['family'].lower() in self.OG_USER_AGENTS:
104-
# url path minus the trailing /
10576
url_path = unquote(request.get_full_path()[:-1])
106-
10777
full_url = unquote(request.build_absolute_uri())
108-
109-
# index of last / to find slug, except when there isn't a last /
110-
if url_path == '':
111-
page_slug = "home"
112-
else:
113-
index = url_path.rindex('/')
114-
page_slug = url_path[index+1:]
78+
page_slug = "home" if url_path == '' else url_path.rsplit('/', 1)[-1]
11579

11680
if self.redirect_path_found(url_path):
117-
# supporters page has the wrong slug
11881
if page_slug == 'foundation':
11982
page_slug = 'supporters'
12083

121-
# look up correct object based on path
122-
if '/details/books/' in url_path:
123-
page = Book.objects.filter(slug=page_slug)
124-
elif '/blog/' in url_path:
125-
page = NewsArticle.objects.filter(slug=page_slug)
126-
elif '/privacy' in url_path:
127-
page = PrivacyPolicy.objects.filter(slug='privacy-policy')
128-
elif '/k12' in url_path:
129-
page = K12Subject.objects.filter(slug='k12-' + page_slug)
130-
elif '/subjects' in url_path:
131-
flag = FeatureFlag.objects.filter(name='new_subjects')
132-
if flag[0].feature_active:
133-
if page_slug == 'subjects':
134-
page_slug = 'new-subjects'
135-
page = Subjects.objects.filter(slug=page_slug)
136-
else:
137-
page = Subject.objects.filter(slug=page_slug+'-books')
138-
else:
139-
page_slug = 'subjects'
140-
page = BookIndex.objects.filter(slug=page_slug)
141-
else:
142-
page = self.page_by_slug(page_slug)
143-
84+
page = self.get_page(url_path, page_slug)
14485
if page:
14586
template = self.build_template(page[0], full_url)
14687
return HttpResponse(template)
147-
else:
148-
return self.get_response(request)
149-
else:
150-
return self.get_response(request)
15188
return self.get_response(request)
15289

90+
def get_page(self, url_path, page_slug):
91+
if '/details/books/' in url_path:
92+
return Book.objects.filter(slug=page_slug)
93+
elif '/blog/' in url_path:
94+
return NewsArticle.objects.filter(slug=page_slug)
95+
elif '/privacy' in url_path:
96+
return PrivacyPolicy.objects.filter(slug='privacy-policy')
97+
elif '/k12' in url_path:
98+
return K12Subject.objects.filter(slug='k12-' + page_slug)
99+
elif '/subjects' in url_path:
100+
flag = FeatureFlag.objects.filter(name='new_subjects')
101+
if flag[0].feature_active:
102+
if page_slug == 'subjects':
103+
page_slug = 'new-subjects'
104+
return Subjects.objects.filter(slug=page_slug)
105+
else:
106+
return Subject.objects.filter(slug=page_slug + '-books')
107+
else:
108+
return BookIndex.objects.filter(slug='subjects')
109+
else:
110+
return self.page_by_slug(page_slug)
111+
153112
def build_template(self, page, page_url):
113+
page_url = page_url.rstrip('/')
154114
image_url = self.image_url(page.promote_image)
155-
template = '<!DOCTYPE html> <html> <head> <meta charset="utf-8">\n'
156-
template += ' <title>' + str(page.title) + '</title>\n'
157-
template += ' <meta name="description" content="{}" >\n'.format(page.search_description)
158-
template += ' <link rel="canonical" href="{}" />\n'.format(page_url)
159-
template += ' <meta property="og:url" content="{}" />\n'.format(page_url)
160-
template += ' <meta property="og:type" content="article" />\n'
161-
template += ' <meta property="og:title" content="{}" />\n'.format(page.title)
162-
template += ' <meta property="og:description" content="{}" />\n'.format(page.search_description)
163-
template += ' <meta property="og:image" content="{}" />\n'.format(image_url)
164-
template += ' <meta property="og:image:alt" content="{}" />\n'.format(page.title)
165-
template += ' <meta name="twitter:card" content="summary_large_image">\n'
166-
template += ' <meta name="twitter:site" content="@OpenStax">\n'
167-
template += ' <meta name="twitter:title" content="{}">\n'.format(page.title)
168-
template += ' <meta name="twitter:description" content="{}">\n'.format(page.search_description)
169-
template += ' <meta name="twitter:image" content="{}">\n'.format(image_url)
170-
template += ' <meta name="twitter:image:alt" content="OpenStax">\n'
171-
template += '</head><body></body></html>'
172-
return template
115+
return f'''<!DOCTYPE html>
116+
<html>
117+
<head>
118+
<meta charset="utf-8">
119+
<title>{page.title}</title>
120+
<meta name="description" content="{page.search_description}">
121+
<link rel="canonical" href="{page_url}">
122+
<meta property="og:url" content="{page_url}">
123+
<meta property="og:type" content="article">
124+
<meta property="og:title" content="{page.title}">
125+
<meta property="og:description" content="{page.search_description}">
126+
<meta property="og:image" content="{image_url}">
127+
<meta property="og:image:alt" content="{page.title}">
128+
<meta name="twitter:card" content="summary_large_image">
129+
<meta name="twitter:site" content="@OpenStax">
130+
<meta name="twitter:title" content="{page.title}">
131+
<meta name="twitter:description" content="{page.search_description}">
132+
<meta name="twitter:image" content="{image_url}">
133+
<meta name="twitter:image:alt" content="OpenStax">
134+
</head>
135+
<body></body>
136+
</html>'''
173137

174138
def redirect_path_found(self, url_path):
175-
if '/blog/' in url_path or '/details/books/' in url_path or '/foundation' in url_path or '/privacy' in url_path or '/subjects' in url_path or '' == url_path:
176-
return True
177-
elif '/k12' in url_path:
178-
last_slash = url_path.rfind('/')
179-
k12_index = url_path.rfind('k12')
180-
if last_slash < k12_index:
181-
return False
182-
else:
183-
return True
184-
else:
185-
return False
139+
return any(substring in url_path for substring in ['/blog/', '/details/books/', '/foundation', '/privacy', '/subjects', '']) or '/k12' in url_path
186140

187141
def image_url(self, image):
188-
image_url = build_image_url(image)
189-
if not image_url:
190-
return ''
191-
return image_url
142+
return build_image_url(image) or ''
192143

193144
def page_by_slug(self, page_slug):
194145
if page_slug == 'supporters':
195146
return Supporters.objects.all()
196147
if page_slug == 'openstax-homepage':
197148
return HomePage.objects.filter(locale=1)
198149
if page_slug == 'home':
199-
return RootPage.objects.filter(locale=1)
200-
201-
202-
150+
return RootPage.objects.filter(locale=1)

openstax/urls.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,14 @@
55
from wagtail.admin import urls as wagtailadmin_urls
66
from wagtail import urls as wagtail_urls
77
from wagtail.documents import urls as wagtaildocs_urls
8-
from wagtail.images.views.serve import ServeView
98
from accounts import urls as accounts_urls
109

1110
from .api import api_router
1211
from news.search import search
1312
from news.feeds import RssBlogFeed, AtomBlogFeed
1413

1514
from api import urls as api_urls
16-
from global_settings.views import throw_error, clear_entire_cache
17-
from wagtail.contrib.sitemaps.views import sitemap
15+
from global_settings.views import throw_error, clear_entire_cache, sitemap
1816

1917
admin.site.site_header = 'OpenStax'
2018

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Generated by Django 5.0.12 on 2025-02-26 04:42
2+
3+
import wagtail.fields
4+
from django.db import migrations
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
("pages", "0153_alter_homepage_options"),
11+
]
12+
13+
operations = [
14+
migrations.AlterField(
15+
model_name="impact",
16+
name="improving_access",
17+
field=wagtail.fields.StreamField(
18+
[("content", 12)],
19+
block_lookup={
20+
0: ("wagtail.images.blocks.ImageChooserBlock", (), {"required": False}),
21+
1: ("wagtail.blocks.CharBlock", (), {"required": False}),
22+
2: ("wagtail.blocks.URLBlock", (), {"required": False}),
23+
3: ("pages.custom_blocks.ImageFormatChoiceBlock", (), {}),
24+
4: (
25+
"wagtail.blocks.CharBlock",
26+
(),
27+
{"help_text": "Used by the frontend for Google Analytics.", "required": False},
28+
),
29+
5: (
30+
"wagtail.blocks.StructBlock",
31+
[[("image", 0), ("alt_text", 1), ("link", 2), ("alignment", 3), ("identifier", 4)]],
32+
{},
33+
),
34+
6: ("wagtail.blocks.CharBlock", (), {}),
35+
7: ("wagtail.blocks.RichTextBlock", (), {}),
36+
8: ("wagtail.blocks.URLBlock", (), {}),
37+
9: ("pages.custom_blocks.APIImageChooserBlock", (), {"required": False}),
38+
10: (
39+
"wagtail.blocks.StructBlock",
40+
[[("icon", 9), ("description", 6), ("link_text", 1), ("link_href", 2)]],
41+
{},
42+
),
43+
11: ("wagtail.blocks.ListBlock", (10,), {}),
44+
12: (
45+
"wagtail.blocks.StructBlock",
46+
[
47+
[
48+
("image", 5),
49+
("heading", 6),
50+
("description", 7),
51+
("button_text", 6),
52+
("button_href", 8),
53+
("cards", 11),
54+
]
55+
],
56+
{},
57+
),
58+
},
59+
),
60+
),
61+
]

0 commit comments

Comments
 (0)