Previous implementation can be viewed in #5767
The resulting query was causing havoc in the DB, spilling over to disk and taking _a long time_ (minutes).
Suggested approaches:
I've never used redis before, but it seems it's just key value store. Does just calculating the data once a day and storing it in some form (e.g. JSON blob) in an async job make sense? Also store the time generated so we can report that so people can see how stale the cached data is?
Then have the /stats endpoint pull the date last generated and stored data and return like we do today. Not running the SQL every time.
cc: @di @dstufft
Re-opening this, /stats seems to be broken, here's a traceback from https://sentry.io/organizations/python-software-foundation/issues/1080678729/:
TypeError: float() argument must be a string or a number, not 'NoneType'
File "raven/middleware.py", line 20, in common_exception_handling
yield
File "raven/middleware.py", line 100, in __call__
iterable = self.application(environ, start_response)
File "warehouse/utils/wsgi.py", line 99, in __call__
return self.app(environ, start_response)
File "warehouse/utils/wsgi.py", line 83, in __call__
return self.app(environ, start_response)
File "warehouse/utils/wsgi.py", line 71, in __call__
return self.app(environ, start_response)
File "whitenoise/base.py", line 75, in __call__
return self.application(environ, start_response)
File "pyramid/router.py", line 270, in __call__
response = self.execution_policy(environ, self)
File "pyramid_retry/__init__.py", line 114, in retry_policy
response = router.invoke_request(request)
File "pyramid/router.py", line 249, in invoke_request
response = handle_request(request)
File "warehouse/sanity.py", line 69, in sanity_tween_ingress
return handler(request)
File "warehouse/referrer_policy.py", line 16, in referrer_policy_tween
response = handler(request)
File "warehouse/csp.py", line 31, in content_security_policy_tween
resp = handler(request)
File "warehouse/config.py", line 82, in require_https_tween
return handler(request)
File "pyramid_tm/__init__.py", line 171, in tm_tween
reraise(*exc_info)
File "pyramid_tm/compat.py", line 36, in reraise
raise value
File "pyramid_tm/__init__.py", line 136, in tm_tween
response = handler(request)
File "warehouse/utils/compression.py", line 92, in compression_tween
response = handler(request)
File "warehouse/raven.py", line 40, in raven_tween
return handler(request)
File "pyramid/tweens.py", line 43, in excview_tween
response = _error_handler(request, exc)
File "pyramid/tweens.py", line 17, in _error_handler
reraise(*exc_info)
File "pyramid/compat.py", line 179, in reraise
raise value
File "pyramid/tweens.py", line 41, in excview_tween
response = handler(request)
File "warehouse/cache/http.py", line 74, in conditional_http_tween
response = handler(request)
File "warehouse/sanity.py", line 76, in sanity_tween_egress
return unicode_redirects(handler(request))
File "pyramid/router.py", line 148, in handle_request
registry, request, context, context_iface, view_name
File "pyramid/view.py", line 667, in _call_view
response = view_callable(context, request)
File "warehouse/sessions.py", line 323, in wrapped
return view(context, request)
File "warehouse/csrf.py", line 38, in wrapped
return view(context, request)
File "pyramid/viewderivers.py", line 514, in csrf_view
return view(context, request)
File "warehouse/cache/http.py", line 33, in wrapped
return view(context, request)
File "warehouse/cache/http.py", line 46, in wrapped
response = view(context, request)
File "warehouse/cache/origin/__init__.py", line 86, in wrapped
return view(context, request)
File "warehouse/cache/origin/derivers.py", line 31, in wrapper_view
return view(context, request)
File "warehouse/metrics/views.py", line 34, in wrapper_view
return view(context, request)
File "pyramid/viewderivers.py", line 460, in rendered_view
request, result, view_inst, context
File "pyramid/renderers.py", line 451, in render_view
return self.render_to_response(response, system, request=request)
File "pyramid/renderers.py", line 474, in render_to_response
result = self.render(value, system_values, request=request)
File "pyramid/renderers.py", line 470, in render
result = renderer(value, system_values)
File "pyramid_jinja2/__init__.py", line 265, in __call__
return template.render(system)
File "jinja2/asyncsupport.py", line 76, in render
return original_render(self, *args, **kwargs)
File "jinja2/environment.py", line 1008, in render
return self.environment.handle_exception(exc_info, True)
File "jinja2/environment.py", line 780, in handle_exception
reraise(exc_type, exc_value, tb)
File "jinja2/_compat.py", line 37, in reraise
raise value.with_traceback(tb)
File "/opt/warehouse/src/warehouse/templates/pages/stats.html", line 14, in top-level template code
{% extends "base.html" %}
File "/opt/warehouse/src/warehouse/templates/base.html", line 130, in top-level template code
{% block body %}
File "/opt/warehouse/src/warehouse/templates/base.html", line 244, in block "body"
{% block content %}
File "/opt/warehouse/src/warehouse/templates/pages/stats.html", line 47, in block "content"
<td>{{ pkg_data["size"]|filesizeformat() }}</td>
File "jinja2/filters.py", line 465, in do_filesizeformat
bytes = float(value)
(cc @DavidBord)
Per https://groups.google.com/forum/#!topic/pypa-dev/2-WcwcTylMY could we add a comment about this outage to https://status.python.org/ ?
@ewdurbin Can you triage here?
It's not super clear why it occurred but there were 11 projects that had a NULL for total_size.
Seems they all have the following in common:
So it must be something in the function/trigger.
CREATE OR REPLACE FUNCTION projects_total_size()
RETURNS TRIGGER AS $$
DECLARE
_release_id uuid;
_project_id uuid;
BEGIN
IF TG_OP = 'INSERT' THEN
_release_id := NEW.release_id;
ELSEIF TG_OP = 'UPDATE' THEN
_release_id := NEW.release_id;
ELSIF TG_OP = 'DELETE' THEN
_release_id := OLD.release_id;
END IF;
_project_id := (SELECT project_id
FROM releases
WHERE releases.id=_release_id);
UPDATE projects
SET total_size=t.project_total_size
FROM (
SELECT SUM(release_files.size) AS project_total_size
FROM release_files WHERE release_id IN
(SELECT id FROM releases WHERE releases.project_id = _project_id)
) AS t
WHERE id=_project_id;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
Seems the issue is that the sub select
SELECT SUM(release_files.size) AS project_total_size
FROM release_files WHERE release_id IN
(SELECT id FROM releases WHERE releases.project_id = _project_id)
) AS t
Does indeed return null when the last release of a given project is deleted.
@DavidBord do you have any idea how to resolve this?
@ewdurbin, how about I'll add a default? something like
SELECT COALESCE(SUM(release_files.size), 0) AS project_total_size
FROM release_files WHERE release_id IN
(SELECT id FROM releases WHERE releases.project_id = _project_id)
) AS t
@DavidBord That looks reasonable! If that will resolve it, please send a PR!
We've missed the JSON rendering. I'll try get a PR up for it. Repro is to run CI there or manually run pypi_stats --debug.
Output of failure:
https://travis-ci.org/cooperlees/pypistats/jobs/571432785
I can't see from the warehouse code, as it decorates the same function @DavidBord fixed for the HTML. Any ideas why the JSON renderer is having issues?
Can someone give me some sanitized logs? I've made 5 or so requests in the last 10 mins.
Seems the sum function for total_package_size was returning a Decimal object that choked the JSON renderer.
I'll open a PR shortly.
@cooperlees this should be sorted now.
Legend - It's all fixed. Thanks!
CI agrees: https://travis-ci.org/cooperlees/pypistats/builds/571438981