Regression plots in time series are useful to create basic overviews of the data changes and levels. The example use case has been presented in this repository.
Plotting regression plots like relplot or lmplot with dateTime as the x axis is currently not supported. It results in an error.
TypeError: unsupported operand type(s) for *: 'Timestamp' and 'float'
Full error log
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/code/data-analysis/yt-history/exploration.py in
----> 35 sns.lmplot(data=grouped, x="date", y="title", hue="language", order=4,scatter=False)
~/code/data-analysis/yt-history/.env/lib/python3.8/site-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~/code/data-analysis/yt-history/.env/lib/python3.8/site-packages/seaborn/regression.py in lmplot(x, y, data, hue, col, row, palette, col_wrap, height, aspect, markers, sharex, sharey, hue_order, col_order, row_order, legend, legend_out, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, seed, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, x_jitter, y_jitter, scatter_kws, line_kws, size)
623 scatter_kws=scatter_kws, line_kws=line_kws,
624 )
--> 625 facets.map_dataframe(regplot, x=x, y=y, **regplot_kws)
626
627 # TODO this will need to change when we relax string requirement
~/code/data-analysis/yt-history/.env/lib/python3.8/site-packages/seaborn/axisgrid.py in map_dataframe(self, func, *args, **kwargs)
748
749 # Draw the plot
--> 750 self._facet_plot(func, ax, args, kwargs)
751
752 # Finalize the annotations and layout
~/code/data-analysis/yt-history/.env/lib/python3.8/site-packages/seaborn/axisgrid.py in _facet_plot(self, func, ax, plot_args, plot_kwargs)
772 plot_kwargs[key] = val
773 plot_args = []
--> 774 func(*plot_args, **plot_kwargs)
775
776 # Sort out the supporting information
~/code/data-analysis/yt-history/.env/lib/python3.8/site-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~/code/data-analysis/yt-history/.env/lib/python3.8/site-packages/seaborn/regression.py in regplot(x, y, data, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, seed, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, dropna, x_jitter, y_jitter, label, color, marker, scatter_kws, line_kws, ax)
833 scatter_kws["marker"] = marker
834 line_kws = {} if line_kws is None else copy.copy(line_kws)
--> 835 plotter.plot(ax, scatter_kws, line_kws)
836 return ax
837
~/code/data-analysis/yt-history/.env/lib/python3.8/site-packages/seaborn/regression.py in plot(self, ax, scatter_kws, line_kws)
368
369 if self.fit_reg:
--> 370 self.lineplot(ax, line_kws)
371
372 # Label the axes
~/code/data-analysis/yt-history/.env/lib/python3.8/site-packages/seaborn/regression.py in lineplot(self, ax, kws)
411 """Draw the model."""
412 # Fit the regression model
--> 413 grid, yhat, err_bands = self.fit_regression(ax)
414 edges = grid[0], grid[-1]
415
~/code/data-analysis/yt-history/.env/lib/python3.8/site-packages/seaborn/regression.py in fit_regression(self, ax, x_range, grid)
199 else:
200 x_min, x_max = ax.get_xlim()
--> 201 grid = np.linspace(x_min, x_max, 100)
202 ci = self.ci
203
<__array_function__ internals> in linspace(*args, **kwargs)
~/code/data-analysis/yt-history/.env/lib/python3.8/site-packages/numpy/core/function_base.py in linspace(start, stop, num, endpoint, retstep, dtype, axis)
118 # Convert float/complex array scalars to float, gh-3504
119 # and make sure one can use variables that have an __array_interface__, gh-6634
--> 120 start = asanyarray(start) * 1.0
121 stop = asanyarray(stop) * 1.0
122
TypeError: unsupported operand type(s) for *: 'Timestamp' and 'float'
If the x axis is an instance of Timestamp convert it to float before fitting the regression model.
This is on the roadmap — half of the work is done with the new core features that permit histogram/KDE over datetimes as seen in 0.11.0. But now the regression plotter needs to be refactored to use that machinery.
In the meantime, it's relatively easy to achieve externally, something like
x = np.random.choice(np.arange("2010-01-01", "2020-01-01", dtype="datetime64[D]"), 200)
y = np.random.normal(size=len(x))
f, ax = plt.subplots()
ax.xaxis.update_units(x)
sns.regplot(x=ax.xaxis.convert_units(x), y=y)
Most helpful comment
In the meantime, it's relatively easy to achieve externally, something like