Collection of common building blocks, helper auxiliary functions and
composable strategy classes for reuse.
Intended for simple missing-link procedures, not reinventing
of better-suited, state-of-the-art, fast libraries,
such as TA-Lib, Tulipy, PyAlgoTrade, NumPy, SciPy ...
Please raise ideas for additions to this collection on the `issue tracker<https://github.com/kernc/backtesting.py>`__.
from collections import OrderedDict
from inspect import currentframe
from itertools import compress
from numbers import Number
from typing import Callable, Optional, Sequence, Union
import numpy as np
import pandas as pd
from ._plotting import plot_heatmaps as _plot_heatmaps
from ._stats import compute_stats as _compute_stats
from ._util import _Array, _as_str
from .backtesting import Strategy
__pdoc__ = {}
OHLCV_AGG = OrderedDict((
('Open', 'first'),
('High', 'max'),
('Low', 'min'),
('Close', 'last'),
('Volume', 'sum'),
"""Dictionary of rules for aggregating resampled OHLCV data frames,
df.resample('4H', label='right').agg(OHLCV_AGG).dropna()
TRADES_AGG = OrderedDict((
('Size', 'sum'),
('EntryBar', 'first'),
('ExitBar', 'last'),
('EntryPrice', 'mean'),
('ExitPrice', 'mean'),
('PnL', 'sum'),
('ReturnPct', 'mean'),
('EntryTime', 'first'),
('ExitTime', 'last'),
('Duration', 'sum'),
"""Dictionary of rules for aggregating resampled trades data,
stats['_trades'].resample('1D', on='ExitTime',
'Equity': 'last',
'DrawdownPct': 'max',
'DrawdownDuration': 'max',
def cross(series1: Sequence, series2: Sequence) -> bool:
Return `True` if `series1` and `series2` just crossed
(above or below) each other.
>>> cross(self.data.Close, self.sma)
return crossover(series1, series2) or crossover(series2, series1)
def crossover(series1: Sequence, series2: Sequence) -> bool:
Return `True` if `series1` just crossed over (above)
>>> crossover(self.data.Close, self.sma)
series1 = (
series1.values if isinstance(series1, pd.Series) else
(series1, series1) if isinstance(series1, Number) else
series2 = (
series2.values if isinstance(series2, pd.Series) else
(series2, series2) if isinstance(series2, Number) else
return series1[-2] < series2[-2] and series1[-1] > series2[-1]
except IndexError:
return False
def plot_heatmaps(heatmap: pd.Series,
agg: Union[str, Callable] = 'max',
ncols: int = 3,
plot_width: int = 1200,
filename: str = '',
open_browser: bool = True):
Plots a grid of heatmaps, one for every pair of parameters in `heatmap`.
`heatmap` is a Series as returned by
`backtesting.backtesting.Backtest.optimize` when its parameter
When projecting the n-dimensional heatmap onto 2D, the values are
aggregated by 'max' function by default. This can be tweaked
with `agg` parameter, which accepts any argument pandas knows
how to aggregate by.
# todo:
# Lay heatmaps out lower-triangular instead of in a simple grid.
# Like [`skopt.plots.plot_objective()`][plot_objective] does.
return _plot_heatmaps(heatmap, agg, ncols, filename, plot_width, open_browser)
def quantile(series: Sequence, quantile: Union[None, float] = None):
If `quantile` is `None`, return the quantile _rank_ of the last
value of `series` wrt former series values.
If `quantile` is a value between 0 and 1, return the _value_ of
`series` at this quantile. If used to working with percentiles, just
divide your percentile amount with 100 to obtain quantiles.
>>> quantile(self.data.Close[-20:], .1)
>>> quantile(self.data.Close)
if quantile is None:
last, series = series[-1], series[:-1]
return np.mean(series < last)
except IndexError:
return np.nan
assert 0 <= quantile <= 1, "quantile must be within [0, 1]"
return np.nanpercentile(series, quantile * 100)
def compute_stats(
stats: pd.Series,
data: pd.DataFrame,
trades: pd.DataFrame = None,
risk_free_rate: float = 0.) -> pd.Series:
(Re-)compute strategy performance metrics.
`stats` is the statistics series as returned by `backtesting.backtesting.Backtest.run()`.
`data` is OHLC data as passed to the `backtesting.backtesting.Backtest`
the `stats` were obtained in.
`trades` can be a dataframe subset of `stats._trades` (e.g. only long trades).
You can also tune `risk_free_rate`, used in calculation of Sharpe and Sortino ratios.
>>> stats = Backtest(GOOG, MyStrategy).run()
>>> only_long_trades = stats._trades[stats._trades.Size > 0]
>>> long_stats = compute_stats(stats=stats, trades=only_long_trades,
... data=GOOG, risk_free_rate=.02)
equity = stats._equity_curve.Equity
if trades is None:
trades = stats._trades
# XXX: Is this buggy?
equity = equity.copy()
equity[:] = stats._equity_curve.Equity.iloc[0]
for t in trades.itertuples(index=False):
equity.iloc[t.EntryBar:] += t.PnL
return _compute_stats(trades=trades, equity=equity, ohlc_data=data,
risk_free_rate=risk_free_rate, strategy_instance=stats._strategy)
def resample_apply(rule: str,
func: Optional[Callable[..., Sequence]],
series: Union[pd.Series, pd.DataFrame, _Array],
agg: Optional[Union[str, dict]] = None,
Apply `func` (such as an indicator) to `series`, resampled to
a time frame specified by `rule`. When called from inside
the result (returned) series will be automatically wrapped in
wrapper method.
`rule` is a valid
`Pandas offset string<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__ indicating
a time frame to resample `series` to.
`func` is the indicator function to apply on the resampled series.
`series` is a data series (or array), such as any of the
`backtesting.backtesting.Strategy.data` series. Due to pandas
resampling limitations, this only works when input series
has a datetime index.
`agg` is the aggregation function to use on resampled groups of data.
Valid values are anything accepted by `pandas/resample/.agg()`.
Default value for dataframe input is `OHLCV_AGG` dictionary.
Default value for series input is the appropriate entry from `OHLCV_AGG`
if series has a matching name, or otherwise the value `"last"`,
which is suitable for closing prices,
but you might prefer another (e.g. `"max"` for peaks, or similar).
Finally, any `*args` and `**kwargs` that are not already eaten by
implicit `backtesting.backtesting.Strategy.I` call
are passed to `func`.
For example, if we have a typical moving average function
`SMA(values, lookback_period)`, _hourly_ data source, and need to
apply the moving average MA(10) on a _daily_ time frame,
but don't want to plot the resulting indicator, we can do:
.. code-block:: python
class System(Strategy):
def init(self):
self.sma = resample_apply(
'D', SMA, self.data.Close, 10, plot=False)
The above short snippet is roughly equivalent to:
.. code-block:: python
class System(Strategy):
def init(self):
# Strategy exposes `self.data` as raw NumPy arrays.
# Let's convert closing prices back to pandas Series.
close = self.data.Close.s
# Resample to daily resolution. Aggregate groups
# using their last value (i.e. closing price at the end
# of the day). Notice `label='right'`. If it were set to
# 'left' (default), the strategy would exhibit
# look-ahead bias.
daily = close.resample('D', label='right').agg('last')
# We apply SMA(10) to daily close prices,
# then reindex it back to original hourly index,
# forward-filling the missing values in each day.
# We make a separate function that returns the final
# indicator array.
def SMA(series, n):
from backtesting.test import SMA
return SMA(series, n).reindex(close.index).ffill()
# The result equivalent to the short example above:
self.sma = self.I(SMA, daily, 10, plot=False)
if func is None:
def func(x, *_, **__):
return x
if not isinstance(series, (pd.Series, pd.DataFrame)):
assert isinstance(series, _Array), \
'resample_apply() takes either a `pd.Series`, `pd.DataFrame`, ' \
'or a `Strategy.data.*` array'
series = series.s
if agg is None:
agg = OHLCV_AGG.get(getattr(series, 'name', ''), 'last')
if isinstance(series, pd.DataFrame):
agg = {column: OHLCV_AGG.get(column, 'last')
for column in series.columns}
resampled = series.resample(rule, label='right').agg(agg).dropna()
resampled.name = _as_str(series) + '[' + rule + ']'
# Check first few stack frames if we are being called from
# inside Strategy.init, and if so, extract Strategy.I wrapper.
frame, level = currentframe(), 0
while frame and level <= 3:
frame = frame.f_back
level += 1
if isinstance(frame.f_locals.get('self'), Strategy): # type: ignore
strategy_I = frame.f_locals['self'].I # type: ignore
def strategy_I(func, *args, **kwargs):
return func(*args, **kwargs)
def wrap_func(resampled, *args, **kwargs):
result = func(resampled, *args, **kwargs)
if not isinstance(result, pd.DataFrame) and not isinstance(result, pd.Series):
result = np.asarray(result)
if result.ndim == 1:
result = pd.Series(result, name=resampled.name)
elif result.ndim == 2:
result = pd.DataFrame(result.T)
# Resample back to data index
if not isinstance(result.index, pd.DatetimeIndex):
result.index = resampled.index
result = result.reindex(index=series.index.union(resampled.index),
return result
wrap_func.__name__ = func.__name__
array = strategy_I(wrap_func, resampled, *args, **kwargs)
return array
def random_ohlc_data(example_data: pd.DataFrame, *,
frac=1., random_state: Optional[int] = None) -> pd.DataFrame:
OHLC data generator. The generated OHLC data has basic
[descriptive statistics](https://en.wikipedia.org/wiki/Descriptive_statistics)
similar to the provided `example_data`.
`frac` is a fraction of data to sample (with replacement). Values greater
than 1 result in oversampling.
Such random data can be effectively used for stress testing trading
strategy robustness, Monte Carlo simulations, significance testing, etc.
>>> from backtesting.test import EURUSD
>>> ohlc_generator = random_ohlc_data(EURUSD)
>>> next(ohlc_generator) # returns new random data
>>> next(ohlc_generator) # returns new random data
def shuffle(x):
return x.sample(frac=frac, replace=frac > 1, random_state=random_state)
if len(example_data.columns.intersection({'Open', 'High', 'Low', 'Close'})) != 4:
raise ValueError("`data` must be a pandas.DataFrame with columns "
"'Open', 'High', 'Low', 'Close'")
while True:
df = shuffle(example_data)
df.index = example_data.index
padding = df.Close - df.Open.shift(-1)
gaps = shuffle(example_data.Open.shift(-1) - example_data.Close)
deltas = (padding + gaps).shift(1).fillna(0).cumsum()
for key in ('Open', 'High', 'Low', 'Close'):
df[key] += deltas
yield df
class SignalStrategy(Strategy):
A simple helper strategy that operates on position entry/exit signals.
This makes the backtest of the strategy simulate a
`vectorized backtest<https://www.google.com/search?q=vectorized+backtest>`__.
See `tutorials<index.html#tutorials>`__ for usage examples.
To use this helper strategy, subclass it, override its
`backtesting.backtesting.Strategy.init` method,
and set the signal vector by calling
`backtesting.lib.SignalStrategy.set_signal` method from within it.
class ExampleStrategy(SignalStrategy):
def init(self):
self.set_signal(sma1 > sma2, sma1 < sma2)
Remember to call `super().init()` and `super().next()` in your
overridden methods.
__entry_signal = (0,)
__exit_signal = (False,)
def set_signal(self, entry_size: Sequence[float],
exit_portion: Optional[Sequence[float]] = None,
plot: bool = True):
Set entry/exit signal vectors (arrays).
A long entry signal is considered present wherever `entry_size`
is greater than zero, and a short signal wherever `entry_size`
is less than zero, following `backtesting.backtesting.Order.size` semantics.
If `exit_portion` is provided, a nonzero value closes portion the position
(see `backtesting.backtesting.Trade.close()`) in the respective direction
(positive values close long trades, negative short).
If `plot` is `True`, the signal entry/exit indicators are plotted when
`backtesting.backtesting.Backtest.plot` is called.
self.__entry_signal = self.I( # type: ignore
lambda: pd.Series(entry_size, dtype=float).replace(0, np.nan),
name='entry size', plot=plot, overlay=False, scatter=True, color='black')
if exit_portion is not None:
self.__exit_signal = self.I( # type: ignore
lambda: pd.Series(exit_portion, dtype=float).replace(0, np.nan),
name='exit portion', plot=plot, overlay=False, scatter=True, color='black')
def next(self):
exit_portion = self.__exit_signal[-1]
if exit_portion > 0:
for trade in self.trades:
if trade.is_long:
elif exit_portion < 0:
for trade in self.trades:
if trade.is_short:
entry_size = self.__entry_signal[-1]
if entry_size > 0:
elif entry_size < 0:
class TrailingStrategy(Strategy):
A strategy with automatic trailing stop-loss, trailing the current
price at distance of some multiple of average true range (ATR). Call
`TrailingStrategy.set_trailing_sl()` to set said multiple
(`6` by default). See `tutorials<index.html#tutorials>`__ for usage examples.
Remember to call `super().init()` and `super().next()` in your
overridden methods.
__n_atr = 6.
__atr = None
def init(self):
def set_atr_periods(self, periods: int = 100):
Set the lookback period for computing ATR. The default value
of 100 ensures a _stable_ ATR.
hi, lo, c_prev = self.data.High, self.data.Low, pd.Series(self.data.Close).shift(1)
tr = np.max([hi - lo, (c_prev - hi).abs(), (c_prev - lo).abs()], axis=0)
atr = pd.Series(tr).rolling(periods).mean().bfill().values
self.__atr = atr
def set_trailing_sl(self, n_atr: float = 6):
Sets the future trailing stop-loss as some multiple (`n_atr`)
average true bar ranges away from the current price.
self.__n_atr = n_atr
def next(self):
# Can't use index=-1 because self.__atr is not an Indicator type
index = len(self.data)-1
for trade in self.trades:
if trade.is_long:
trade.sl = max(trade.sl or -np.inf,
self.data.Close[index] - self.__atr[index] * self.__n_atr)
trade.sl = min(trade.sl or np.inf,
self.data.Close[index] + self.__atr[index] * self.__n_atr)
# Prevent pdoc3 documenting __init__ signature of Strategy subclasses
for cls in list(globals().values()):
if isinstance(cls, type) and issubclass(cls, Strategy):
__pdoc__[f'{cls.__name__}.__init__'] = False
# NOTE: Don't put anything below this __all__ list
__all__ = [getattr(v, '__name__', k)
for k, v in globals().items() # export
if ((callable(v) and v.__module__ == __name__ or # callables from this module
k.isupper()) and # or CONSTANTS
not getattr(v, '__name__', k).startswith('_'))] # neither marked internal
# NOTE: Don't put anything below here. See above.