diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 5499132..ef7d5d0 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -1,4 +1,4 @@ -name: build-test +name: build & test on: push: diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..aed7282 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,28 @@ +name: Publish Python Package + +on: + release: + types: [published] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/README.md b/README.md new file mode 100644 index 0000000..340357c --- /dev/null +++ b/README.md @@ -0,0 +1,639 @@ +# Stock Statistics/Indicators Calculation Helper + +[![build & test](https://github.com/jealous/stockstats/actions/workflows/build-test.yml/badge.svg)](https://github.com/jealous/stockstats/actions/workflows/build-test.yml) +[![codecov](https://codecov.io/gh/jealous/stockstats/branch/master/graph/badge.svg?token=IFMD1pVJ7T)](https://codecov.io/gh/jealous/stockstats) +[![pypi](https://img.shields.io/pypi/v/stockstats.svg)](https://pypi.python.org/pypi/stockstats) + +VERSION: 0.4.0 + +## Introduction + +Supply a wrapper ``StockDataFrame`` for ``pandas.DataFrame`` with inline stock +statistics/indicators support. + +Supported statistics/indicators are: + +* change (in percent) +* delta +* permutation (zero based) +* log return +* max in range +* min in range +* middle = (close + high + low) / 3 +* compare: le, ge, lt, gt, eq, ne +* count: both backward(c) and forward(fc) +* cross: including upward cross and downward cross +* SMA: Simple Moving Average +* EMA: Exponential Moving Average +* MSTD: Moving Standard Deviation +* MVAR: Moving Variance +* RSV: Raw Stochastic Value +* RSI: Relative Strength Index +* KDJ: Stochastic Oscillator +* Bolling: Bollinger Band +* MACD: Moving Average Convergence Divergence +* CR: Energy Index (Intermediate Willingness Index) +* WR: Williams Overbought/Oversold index +* CCI: Commodity Channel Index +* TR: True Range +* ATR: Average True Range +* DMA: Different of Moving Average (10, 50) +* DMI: Directional Moving Index, including + * +DI: Positive Directional Indicator + * -DI: Negative Directional Indicator + * ADX: Average Directional Movement Index + * ADXR: Smoothed Moving Average of ADX +* TRIX: Triple Exponential Moving Average +* TEMA: Another Triple Exponential Moving Average +* VR: Volatility Volume Ratio +* MFI: Money Flow Index +* VWMA: Volume Weighted Moving Average +* CHOP: Choppiness Index +* KAMA: Kaufman's Adaptive Moving Average +* PPO: Percentage Price Oscillator +* StochRSI: Stochastic RSI +* WT: LazyBear's Wave Trend + +## Installation + +```pip install stockstats``` + +## Compatibility + +The build checks the compatibility for the last two major release of python3 and +the last release of python2. + +## License + +[BSD-3-Clause License](./LICENSE.txt) + +## Tutorial + +### Initialization + +`StockDataFrame` works as a wrapper for the `pandas.DataFrame`. You need to +Initialize the `StockDataFrame` with `wrap` or `retype`. + +``` python +import pandas as pd +from stockstats import StockDataFrame + +df = pd.read_csv('stock.csv') +stock = StockDataFrame.wrap(df) +``` + +Formalize your data. This package takes for granted that your data is sorted by +timestamp and contains certain columns. Please align your column name. + +* `date`: timestamp of the record, optional. +* `close`: the close price of the period +* `high`: the highest price of the interval +* `low`: the lowest price of the interval +* `volume`: the volume of stocks traded during the interval + +Note these column names are case-insensitive. They are converted to lower case +when you wrap the data frame. + +By default, the `date` column is used as the index. Users can also specify the +index column name in the `wrap` or `retype` function. + +Example: +`DataFrame` loaded from CSV. + +``` + Date Amount Close High Low Volume +0 20040817 90923240.0 11.20 12.21 11.03 7877900 +1 20040818 52955668.0 10.29 10.90 10.29 5043200 +2 20040819 32614676.0 10.53 10.65 10.30 3116800 +... ... ... ... ... ... ... +2810 20160815 56416636.0 39.58 39.79 38.38 1436706 +2811 20160816 68030472.0 39.66 40.86 39.00 1703600 +2812 20160817 62536480.0 40.45 40.59 39.12 1567600 +``` + +After conversion to `StockDataFrame` + +``` + amount close high low volume +date +20040817 90923240.0 11.20 12.21 11.03 7877900 +20040818 52955668.0 10.29 10.90 10.29 5043200 +20040819 32614676.0 10.53 10.65 10.30 3116800 +... ... ... ... ... ... +20160815 56416636.0 39.58 39.79 38.38 1436706 +20160816 68030472.0 39.66 40.86 39.00 1703600 +20160817 62536480.0 40.45 40.59 39.12 1567600 +``` + +### Access the Data + +`StockDataFrame` is a subclass of `pandas.DataFrame`. All the functions +of `pandas.DataFrame` should work the same as before. + +#### Retrieve the data with symbol + +We allow the user to access the statistics directly with some specified column +name, such as: `kdjk`, `macd`, `rsi`. + +Note that the value of these columns are calculated the first time you access +them from the data frame. You need to delete those columns first if you want the +lib to re-evaluate the value. + +#### Retrieve the Series + +If you need the `Series`, you can use `macd = stock['macd']` +or `rsi = stock.get('rsi')`. + +#### Retrieve the symbol with 2 arguments + +For some statistics, we allow the user to supply the column name and the window, +such as: delta, shift, simple moving average, etc. You can use the following +patter to calculate them: `__` + +Here are some examples for the pattern: + +* 5 periods simple moving average of the high price: `high_5_sma` +* 10 periods exponential moving average of the close: `close_10_ema` +* 1 period delta of the high price: `high_-1_d` + The `-` symbol stands for looking backwards. + +#### Retrieve the symbol with 1 arguments + +Some statistics allows the user to specify the window but not the column. Use +following patter to specify your window: `_` + +For example: + +* 6 periods RSI: `rsi_6` +* 10 periods CCI: `cci_10` +* 13 periods ATR: `atr_13` + +Normally, these statistics have default windows. +Check their document for detail. + +### Statistics/Indicators + +Some statistics has configurable parameters. They are class level fields. Change +of these fields are global. And they won't affect the existing results. Removing +existing results to trigger the re-calculation of these columns. + +#### Change of the Close + +`df['change']` is the change of the `close` price in percentage. + +#### Delta of Periods + +Using pattern `__d` to retrieve the delta between different periods. + +You can also use `_delta` as a shortcut to `_-1_d` + + +For example: +* `df['close_-1_d']` retrieves the close price delta between current and prev. period. +* `df['close_delta']` is the save as `df['close_-1_d']` +* `df['high_2_d']` retrieves the high price delta between current and 2 days later + +#### Shift Periods + +Shift the column backward or forward. It takes 2 parameters: + +* the name of the column to shift +* periods to shift, can be negative + +We fill the head and tail with the nearest data. + +See the example below: + +``` python +In [15]: df[['close', 'close_-1_s', 'close_2_s']] +Out[15]: + close close_-1_s close_2_s +date +20040817 11.20 11.20 10.53 +20040818 10.29 11.20 10.55 +20040819 10.53 10.29 10.10 +20040820 10.55 10.53 10.25 +... ... ... ... +20160812 39.10 38.70 39.66 +20160815 39.58 39.10 40.45 +20160816 39.66 39.58 40.45 +20160817 40.45 39.66 40.45 + +[2813 rows x 3 columns] +``` + +#### RSI - Relative Strength Index + +RSI stands +for [Relative Strength Index](https://en.wikipedia.org/wiki/Relative_strength_index) + +It has a configurable window. The default window size is 14 which is +configurable through `StockDataFrame.RSI`. e.g. + +* `df['rsi']`: 14 periods RSI +* `df['rsi_6']`: 6 periods RSI + +#### Log Return of the Close + +Logarithmic return = ln( close / last close) + +From [wiki](https://en.wikipedia.org/wiki/Rate_of_return): + +> For example, if a stock is priced at 3.570 USD per share at the close on +> one day, and at 3.575 USD per share at the close the next day, then the +> logarithmic return is: ln(3.575/3.570) = 0.0014, or 0.14%. + +Use `df['log-ret']` to access this column. + +#### Count of Non-Zero Value + +Count non-zero value of a specific range. It requires a column and a window. + +Examples: + +* count how many typical price are larger than close in the past 10 periods + +``` python +In [22]: tp = df['middle'] + +In [23]: df['res'] = df['middle'] > df['close'] + +In [24]: df[['middle', 'close', 'res', 'res_-10_c']] +Out[24]: + middle close res res_10_c +date +20040817 11.480000 11.20 True 1.0 +20040818 10.493333 10.29 True 2.0 +20040819 10.493333 10.53 False 2.0 +20040820 10.486667 10.55 False 2.0 +20040823 10.163333 10.10 True 3.0 +... ... ... ... ... +20160811 38.703333 38.70 True 5.0 +20160812 38.916667 39.10 False 5.0 +20160815 39.250000 39.58 False 4.0 +20160816 39.840000 39.66 True 5.0 +20160817 40.053333 40.45 False 5.0 + +[2813 rows x 4 columns] +``` + +* Count ups in the past 10 periods + +``` python +In [26]: df['ups'], df['downs'] = df['change'] > 0, df['change'] < 0 + +In [27]: df[['ups', 'ups_10_c', 'downs', 'downs_10_c']] +Out[27]: + ups ups_10_c downs downs_10_c +date +20040817 False 0.0 False 0.0 +20040818 False 0.0 True 1.0 +20040819 True 1.0 False 1.0 +20040820 True 2.0 False 1.0 +20040823 False 2.0 True 2.0 +... ... ... ... ... +20160811 False 3.0 True 7.0 +20160812 True 3.0 False 7.0 +20160815 True 4.0 False 6.0 +20160816 True 5.0 False 5.0 +20160817 True 5.0 False 5.0 + +[2813 rows x 4 columns] +``` + +#### Max and Min of the Periods + +Retrieve the max/min value of specified periods. They require column and +window. +Note the window does NOT simply stand for the rolling window. + +Examples: + +* `close_-3,2_max` stands for the max of 2 periods later and 3 periods ago +* `close_-2~0_min` stands for the min of 2 periods ago till now + +#### RSV - Raw Stochastic Value + +RSV is essential for calculating KDJ. It takes a window parameter. +Use `df['rsv']` or `df['rsv_6']` to access it. + +#### RSI - Relative Strength Index + +[RSI](https://en.wikipedia.org/wiki/Relative_strength_index) chart the current +and historical strength or weakness of a stock. It takes a window parameter. + +The default window is 14. Use `StockDataFrame.RSI` to tune it. + +Examples: + +* `df['rsi']`: retrieve the RSI of 14 periods +* `df['rsi_6']`: retrieve the RSI of 6 periods + +#### Stochastic RSI + +[Stochastic RSI](https://www.investopedia.com/terms/s/stochrsi.asp) gives +traders an idea of whether the current RSI value is overbought or oversold. It +takes a window parameter. + +The default window is 14. Use `StockDataFrame.RSI` to tune it. + +Examples: + +* `df['stochrsi']`: retrieve the Stochastic RSI of 14 periods +* `df['stochrsi_6']`: retrieve the Stochastic RSI of 6 periods + +#### WT - Wave Trend + +Retrieve +the [LazyBear's Wave Trend](https://medium.com/@samuel.mcculloch/lets-take-a-look-at-wavetrend-with-crosses-lazybear-s-indicator-2ece1737f72f) +with `df['wt1']` and `df['wt2']`. + +Wave trend uses two parameters. You can tune them with +`StockDataFrame.WAVE_TREND_1` and `StockDataFrame.WAVE_TREND_2`. + +#### SMMA - Smoothed Moving Average + +It takes two parameters, column and window. + +For example, use `df['close_7_smma']` to retrieve the 7 periods smoothed moving +average of the close price. + +#### TRIX - Triple Exponential Average + +[Trix, the triple exponential average](https://www.investopedia.com/articles/technical/02/092402.asp) +, is used to identify oversold and overbought markets. + +The algorithm is: + +``` +TRIX = (TripleEMA - LastTripleEMA) - * 100 / LastTripleEMA +TripleEMA = EMA of EMA of EMA +LastTripleEMA = TripleEMA of the last period +``` + +It takes two parameters, column and window. By default, the column is `close`, +the window is 12. + +Use `StockDataFrame.TRIX_EMA_WINDOW` to change the default window. + +Examples: + +* `df['trix']` stands for 12 periods Trix for the close price. +* `df['middle_10_trix']` stands for the 10 periods Trix for the typical price. + +#### TEMA - Another Triple Exponential Average + +Tema is another implementation for the triple exponential moving average. You +can find the +algorithm [here](https://www.forextraders.com/forex-education/forex-technical-analysis/triple-exponential-moving-average-the-tema-indicator/) +. + +``` +TEMA=(3 x EMA) - (3 x EMA of EMA) + (EMA of EMA of EMA) +``` + +It takes two parameters, column and window. By default, the column is `close`, +the window is 5. + +Use `StockDataFrame.TEMA_EMA_WINDOW` to change the default window. + +Examples: + +* `df['tema']` stands for 12 periods TEMA for the close price. +* `df['middle_10_tema']` stands for the 10 periods TEMA for the typical price. + +#### WR - Williams Overbought/Oversold Index + +[Williams Overbought/Oversold index](https://www.investopedia.com/terms/w/williamsr.asp) +is a type of momentum indicator that moves between 0 and -100 and measures +overbought and oversold levels. + +It takes a window parameter. The default window is 14. Use `StockDataFrame.WR` +to change the default window. + +Examples: + +* `df['wr']` retrieves the 14 periods WR. +* `df['wr_6']` retrieves the 6 periods WR. + +#### CCI - Commodity Channel Index + +CCI stands +for [Commodity Channel Index](https://www.investopedia.com/terms/c/commoditychannelindex.asp) +. + +It requires a window parameter. The default window is 14. Use +`StockDataFrame.CCI` to change it. + +Examples: + +* `df['cci']` retrieves the default 14 periods CCI. +* `df['cci_6']` retrieves the 6 periods CCI. + +#### TR - True Range of Trading + +TR is a measure of volatility of a High-Low-Close series. It is used for +calculating the ATR. + +#### ATR - Average True Range + +The [Average True Range](https://en.wikipedia.org/wiki/Average_true_range) is an +N-period smoothed moving average (SMMA) of the true range value. +Default to 14 periods. + +Users can modify the default window with `StockDataFrame.ATR_SMMA`. + +Example: + +* `df['atr']` retrieves the 14 periods ATR. +* `df['atr_5']` retrieves the 5 periods ATR. + +#### DMA - Difference of Moving Average + +`df['dma']` retreives the difference of 10 periods SMA of the close price and +the 50 periods SMA of the close price. + +#### DMI - Directional Movement Index + +The [directional movement index (DMI)](https://www.investopedia.com/terms/d/dmi.asp) +identifies in which direction the price of an asset is moving. + +It has several lines: + +* `df['pdi']` is the positive directional movement line (+DI) +* `df['mdi']` is the negative directional movement line (-DI) +* `df['dx']` is the directional index (DX) +* `df['adx']` is the average directional index (ADX) +* `df['adxr']` is an EMA for ADX + +It has several parameters. + +* `StockDataFrame.PDI_SMMA` - window for +DI +* `StockDataFrame.MDI_SMMA` - window for -DI +* `StockDataFrame.DX_SMMA` - window for DX +* `StockDataFrame.ADX_EMA` - window for ADX +* `StockDataFrame.ADXR_EMA` - window for ADXR + +#### KDJ Indicator + +It consists of three lines: +* `df['kdfk']` - K series +* `df['kdfd']` - D series +* `df['kdfj']` - J series + +The default window is 9. Use `StockDataFrame.KDJ_WINDOW` to change it. +Use `df['kdjk_6']` to retrieve the K series of 6 periods. + +KDJ also has two configurable parameter named `StockDataFrame.KDJ_PARAM`. +The default value is `(2.0/3.0, 1.0/3.0)` + +#### CR - Energy Index + +The [Energy Index (Intermediate Willingness Index)](https://support.futunn.com/en/topic167/?lang=en-us) +uses the relationship between the highest price, the lowest price and +yesterday's middle price to reflect the market's willingness to buy +and sell. + +It contains 4 lines: +* `df['cr']` - the CR line +* `df['cr-ma1']` - `StockDataFrame.CR_MA1` periods of the CR moving average +* `df['cr-ma2']` - `StockDataFrame.CR_MA2` periods of the CR moving average +* `df['cr-ma3']` - `StockDataFrame.CR_MA3` periods of the CR moving average + +#### Typical Price + +It's the average of `high`, `low` and `close`. +Use `df['middle']` to access this value. + +#### Bollinger Bands + +The Bollinger bands includes three lines +* `df['boll']` is the baseline +* `df['boll_ub']` is the upper band +* `df['boll_lb']` is the lower band + +The default period of the Bollinger Band can be changed with +`StockDataFrame.BOLL_PERIOD`. The width of the bands can be turned with +`StockDataFrame.BOLL_STD_TIMES`. The default value is 2. + +#### MACD - Moving Average Convergence Divergence + +We use the close price to calculate the MACD lines. +* `df['macd']` is the difference between two exponential moving average. +* `df['macds]` is the signal line. +* `df['macdh']` is he histogram line. + +The period of short and long EMA can be tuned with +`StockDataFrame.MACD_EMA_SHORT` and `StockDataFrame.MACD_EMA_LONG`. The default +value are 12 and 26 + +The period of the signal line can be tuned with +`StockDataFrame.MACD_EMA_SIGNAL`. The default value is 9. + +#### PPO - Percentage Price Oscillator + +The [Percentage Price Oscillator](https://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:price_oscillators_ppo) +includes three lines. + +* `df['ppo']` derives from the difference of 2 exponential moving average. +* `df['ppos]` is the signal line. +* `df['ppoh']` is he histogram line. + +The period of short and long EMA can be tuned with +`StockDataFrame.PPO_EMA_SHORT` and `StockDataFrame.PPO_EMA_LONG`. The default +value are 12 and 26 + +The period of the signal line can be tuned with +`StockDataFrame.PPO_EMA_SIGNAL`. The default value is 9. + +#### Moving Standard Deviation + +Follow the pattern `__mstd` to retrieve the moving STD. + +#### Moving Variance + +Follow the pattern `__mvar` to retrieve the moving VAR. + +#### Volume Weighted Moving Average + +It's the [moving average weighted by volume](https://www.investopedia.com/articles/trading/11/trading-with-vwap-mvwap.asp). + +It has a parameter for window size. The default window is 14. Change it with +`StockDataFrame.VWMA`. + +Examples: +* `df['vwma']` retrieves the 14 periods VWMA +* `df['vwma_6']` retrieves the 6 periods VWMA + +#### CHOP - Choppiness Index + +The [Choppiness Index](https://www.tradingview.com/education/choppinessindex/) +determines if the market is choppy. + +It has a parameter for window size. The default window is 14. Change it with +`StockDataFrame.CHOP`. + +Examples: +* `df['chop']` retrieves the 14 periods CHOP +* `df['chop_6']` retrieves the 6 periods CHOP + +#### MFI - Money Flow Index + +The [Money Flow Index](https://www.investopedia.com/terms/m/mfi.asp) +identifies overbought or oversold signals in an asset. + +It has a parameter for window size. The default window is 14. Change it with +`StockDataFrame.MFI`. + +Examples: +* `df['mfi']` retrieves the 14 periods MFI +* `df['mfi_6']` retrieves the 6 periods MFI + +#### KAMA - Kaufman's Adaptive Moving Average + +[Kaufman's Adaptive Moving Average](https://school.stockcharts.com/doku.php?id=technical_indicators:kaufman_s_adaptive_moving_average) +is designed to account for market noise or volatility. + +It has 2 optional parameter and 2 required parameter +* fast - optional, the parameter for fast EMA smoothing, default to 5 +* slow - optional, the parameter for slow EMA smoothing, default to 34 +* column - required, the column to calculate +* window - required, rolling window size + +The default value for fast and slow can be configured with +`StockDataFrame.KAMA_FAST` and `StockDataFrame.KAMA_SLOW` + +Examples: +* `df['close_10_kama_2_30']` retrieves 10 periods KAMA of the close price with + `fast = 2` and `slow = 30` +* `df['close_2_kama']` retrieves 2 periods KAMA of the close price + +#### Cross Upwards and Cross Downwards + +Use the pattern `_xu_` to check when A crosses up B. + +Use the pattern `_xd_` to check when A crosses down B. + +Use the pattern `_x_` to check when A crosses B. + +Examples: +* `kdjk_x_kdjd` returns a series marks the cross of KDJK and KDJD +* `kdjk_xu_kdjd` returns a series marks where KDJK crosses up KDJD +* `kdjk_xd_kdjd` returns a series marks where KDJD crosses down KDJD + +## Issues + +We use [Github Issues](https://github.com/jealous/stockstats/issues) to track +the issues or bugs. + +## Others + +MACDH Note: + +In July 2017 the code for MACDH was changed to drop an extra 2x multiplier on +the final value to align better with calculation methods used in tools like +cryptowatch, tradingview, etc. + +## Contact author: + +- Cedric Zhuang diff --git a/README.rst b/README.rst deleted file mode 100644 index a4aa416..0000000 --- a/README.rst +++ /dev/null @@ -1,275 +0,0 @@ -Stock Statistics/Indicators Calculation Helper -============================================== - -.. image:: https://travis-ci.org/jealous/stockstats.svg - :target: https://travis-ci.org/jealous/stockstats - -.. image:: https://coveralls.io/repos/jealous/stockstats/badge.svg - :target: https://coveralls.io/github/jealous/stockstats - -.. image:: https://img.shields.io/pypi/v/stockstats.svg - :target: https://pypi.python.org/pypi/stockstats - - -VERSION: 0.3.2 - -Introduction ------------- - -Supply a wrapper ``StockDataFrame`` based on the ``pandas.DataFrame`` with -inline stock statistics/indicators support. - -Supported statistics/indicators are: - -- change (in percent) -- delta -- permutation (zero based) -- log return -- max in range -- min in range -- middle = (close + high + low) / 3 -- compare: le, ge, lt, gt, eq, ne -- count: both backward(c) and forward(fc) -- SMA: simple moving average -- EMA: exponential moving average -- MSTD: moving standard deviation -- MVAR: moving variance -- RSV: raw stochastic value -- RSI: relative strength index -- KDJ: Stochastic oscillator -- Bolling: including upper band and lower band. -- MACD: moving average convergence divergence. Including signal and histogram. (see note) -- CR: -- WR: Williams Overbought/Oversold index -- CCI: Commodity Channel Index -- TR: true range -- ATR: average true range -- line cross check, cross up or cross down. -- DMA: Different of Moving Average (10, 50) -- DMI: Directional Moving Index, including - - - +DI: Positive Directional Indicator - - -DI: Negative Directional Indicator - - ADX: Average Directional Movement Index - - ADXR: Smoothed Moving Average of ADX - -- TRIX: Triple Exponential Moving Average -- TEMA: Another Triple Exponential Moving Average -- VR: Volatility Volume Ratio -- MFI: Money Flow Index - -Installation ------------- - -``pip install stockstats`` - -Compatibility -------------- - -Please check the `setup.py`_ file. - -Note that pandas add some type check after version 1.0. -One type assert is skipped in ``StockDataFrame``. Check ISSUE-50 for detail. - -License -------- - -`BSD`_ - -Tutorial --------- - -- Initialize the ``StockDataFrame`` with the ``retype`` function which - convert a ``pandas.DataFrame`` to a ``StockDataFrame``. - -.. code-block:: python - - stock = StockDataFrame.retype(pd.read_csv('stock.csv')) - - -- Formalize your data. This package takes for granted that your data is sorted - by timestamp and contains certain columns. Please align your column name. - - + ``open``: the open price of the interval - - + ``close``: the close price of the interval - - + ``high``: the highest price of the interval - - + ``low``: the lowest price of the interval - - + ``volume``: the volume of stocks traded during the interval - - + ``amount``: the amount of the stocks during the interval - -- There are some shortcuts for frequent used statistics/indicators like - ``kdjk``, ``boll_hb``, ``macd``, etc. - -- The indicators/statistics are generated on the fly when they are accessed. - If you are accessing through ``Series``, it may return not found error. - The fix is to explicitly initialize it by accessing it like below: - -.. code-block:: python - - _ = stock['macd'] - # or - stock.get('macd') - -- Using get item to access the indicators. The item name following the - pattern: ``{columnName_window_statistics}``. - Some statistics/indicators has their short cut. See examples below: - -.. code-block:: python - - # volume delta against previous day - stock['volume_delta'] - - # open delta against next 2 day - stock['open_2_d'] - - # open price change (in percent) between today and the day before yesterday - # 'r' stands for rate. - stock['open_-2_r'] - - # CR indicator, including 5, 10, 20 days moving average - stock['cr'] - stock['cr-ma1'] - stock['cr-ma2'] - stock['cr-ma3'] - - # volume max of three days ago, yesterday and two days later - stock['volume_-3,2,-1_max'] - - # volume min between 3 days ago and tomorrow - stock['volume_-3~1_min'] - - # KDJ, default to 9 days - stock['kdjk'] - stock['kdjd'] - stock['kdjj'] - - # three days KDJK cross up 3 days KDJD - stock['kdj_3_xu_kdjd_3'] - - # 2 days simple moving average on open price - stock['open_2_sma'] - - # MACD - stock['macd'] - # MACD signal line - stock['macds'] - # MACD histogram - stock['macdh'] - - # bolling, including upper band and lower band - stock['boll'] - stock['boll_ub'] - stock['boll_lb'] - - # close price less than 10.0 in 5 days count - stock['close_10.0_le_5_c'] - - # CR MA2 cross up CR MA1 in 20 days count - stock['cr-ma2_xu_cr-ma1_20_c'] - - # count forward(future) where close price is larger than 10 - stock['close_10.0_ge_5_fc'] - - # 6 days RSI - stock['rsi_6'] - # 12 days RSI - stock['rsi_12'] - - # 10 days WR - stock['wr_10'] - # 6 days WR - stock['wr_6'] - - # CCI, default to 14 days - stock['cci'] - # 20 days CCI - stock['cci_20'] - - # TR (true range) - stock['tr'] - # ATR (Average True Range) - stock['atr'] - - # DMA, difference of 10 and 50 moving average - stock['dma'] - - # DMI - # +DI, default to 14 days - stock['pdi'] - # -DI, default to 14 days - stock['mdi'] - # DX, default to 14 days of +DI and -DI - stock['dx'] - # ADX, 6 days SMA of DX, same as stock['dx_6_ema'] - stock['adx'] - # ADXR, 6 days SMA of ADX, same as stock['adx_6_ema'] - stock['adxr'] - - # TRIX, default to 12 days - stock['trix'] - # TRIX based on the close price for a window of 3 - stock['close_3_trix'] - # MATRIX is the simple moving average of TRIX - stock['trix_9_sma'] - # TEMA, another implementation for triple ema - stock['tema'] - # TEMA based on the close price for a window of 2 - stock['close_2_tema'] - - # VR, default to 26 days - stock['vr'] - # MAVR is the simple moving average of VR - stock['vr_6_sma'] - - # Money flow index, default to 14 days - stock['mfi'] - -- Following options are available for tuning. Note that all of them are class level options and MUST be changed before any calculation happens. - - KDJ - - KDJ_WINDOW: default to 9 - - BOLL - - BOLL_WINDOW: default to 20 - - BOLL_STD_TIMES: default to 2 - - MACD - - MACD_EMA_SHORT: default to 12 - - MACD_EMA_LONG: default to 26 - - MACD_EMA_SIGNAL: default to 9 - - PDI, MDI, DX & ADX - - PDI_SMMA: default to 14 - - MDI_SMMA: default to 14 - - DX_SMMA: default to 14 - - ADX_EMA: default to 6 - - ADXR_EMA: default to 6 - - CR - - CR_MA1: default to 5 - - CR_MA2: default to 10 - - CR_MA3: default to 20 - - Triple EMA - - TRIX_EMA_WINDOW: default to 12 - - TEMA_EMA_WINDOW: default to 5 - - ATR - - ATR_SMMA: default to 14 - - MFI - - MFI: default to 14 - - -To file issue, please visit: - -https://github.com/jealous/stockstats - - -MACDH Note: - -In July 2017 the code for MACDH was changed to drop an extra 2x multiplier on the final value to align better with calculation methods used in tools like cryptowatch, tradingview, etc. - -Contact author: - -- Cedric Zhuang - -.. _BSD: LICENSE.txt -.. _setup.py: setup.py diff --git a/setup.py b/setup.py index 2210bde..936c8f7 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ def read_requirements(filename): def get_long_description(): - filename = 'README.rst' + filename = 'README.md' return read(filename) @@ -80,10 +80,8 @@ def get_long_description(): classifiers=[ "Programming Language :: Python", "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Programming Language :: Python :: Implementation :: CPython", "Natural Language :: English", "Intended Audience :: Developers", @@ -91,8 +89,9 @@ def get_long_description(): "Operating System :: OS Independent", "Development Status :: 4 - Beta", "Topic :: Utilities", - "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved :: BSD License", ], install_requires=read_requirements('requirements.txt'), - tests_require=read_requirements('test-requirements.txt') + tests_require=read_requirements('test-requirements.txt'), + long_description_content_type='text/markdown', ) diff --git a/stockstats.py b/stockstats.py index a4101b0..13a6bc1 100644 --- a/stockstats.py +++ b/stockstats.py @@ -27,8 +27,6 @@ from __future__ import unicode_literals import itertools -import logging -import operator import re import numpy as np @@ -36,16 +34,18 @@ __author__ = 'Cedric Zhuang' -log = logging.getLogger(__name__) - def wrap(df, index_column=None): + """ wraps a pandas DataFrame to StockDataFrame + + :param df: pandas DataFrame + :param index_column: the name of the index column, default to ``date`` + :return: an object of StockDataFrame + """ return StockDataFrame.retype(df, index_column) class StockDataFrame(pd.DataFrame): - OPERATORS = ['le', 'ge', 'lt', 'gt', 'eq', 'ne'] - # Start of options. KDJ_PARAM = (2.0 / 3.0, 1.0 / 3.0) KDJ_WINDOW = 9 @@ -89,6 +89,8 @@ class StockDataFrame(pd.DataFrame): VR = 26 + WR = 14 + WAVE_TREND_1 = 10 WAVE_TREND_2 = 21 @@ -112,8 +114,8 @@ def _get_change(cls, df): """ df['change'] = cls._change(df['close'], -1) - @staticmethod - def _get_p(df, column, shifts): + @classmethod + def _get_p(cls, df, column, shifts): """ get the permutation of specified range example: @@ -132,7 +134,7 @@ def _get_p(df, column, shifts): column_name = '{}_{}_p'.format(column, shifts) # initialize the column if not df.get(column) - shifts = StockDataFrame.to_ints(shifts)[::-1] + shifts = cls.to_ints(shifts)[::-1] indices = None count = 0 for shift in shifts: @@ -145,13 +147,12 @@ def _get_p(df, column, shifts): count += 1 if indices is not None: cp = indices.copy() - StockDataFrame.set_nan(cp, shifts) + cls.set_nan(cp, shifts) df[column_name] = cp @classmethod def to_ints(cls, shifts): - items = map(cls._process_shifts_segment, - shifts.split(',')) + items = map(cls._process_shifts_segment, shifts.split(',')) return sorted(list(set(itertools.chain(*items)))) @classmethod @@ -161,18 +162,6 @@ def to_int(cls, shifts): raise IndexError("only accept 1 number.") return numbers[0] - @staticmethod - def to_floats(shifts): - floats = map(float, shifts.split(',')) - return sorted(list(set(floats))) - - @classmethod - def to_float(cls, shifts): - floats = cls.to_floats(shifts) - if len(floats) != 1: - raise IndexError('only accept 1 float.') - return floats[0] - @staticmethod def _process_shifts_segment(shift_segment): if '~' in shift_segment: @@ -258,7 +247,7 @@ def _get_log_ret(cls, df): def _get_c(cls, df, column, shifts): """ get the count of column in range (shifts) - example: kdjj_0_le_20_c + example: change_20_c :param df: stock data :param column: column name :param shifts: range to count, only to previous @@ -276,7 +265,7 @@ def _get_c(cls, df, column, shifts): def _get_fc(cls, df, column, shifts): """ get the count of column in range of future (shifts) - example: kdjj_0_le_20_fc + example: change_20_fc :param df: stock data :param column: column name :param shifts: range to count, only to future @@ -293,13 +282,6 @@ def _get_fc(cls, df, column, shifts): df[column_name] = counts return counts - @classmethod - def _get_op(cls, df, column, threshold, op): - column_name = '{}_{}_{}'.format(column, threshold, op) - threshold = cls.to_float(threshold) - f = getattr(operator, op) - df[column_name] = f(df[column], threshold) - @classmethod def _init_shifted_columns(cls, column, df, shifts): # initialize the column if not @@ -340,7 +322,6 @@ def _get_rsv(cls, df, window): cv = (df['close'] - low_min) / (high_max - low_min) df[column_name] = cv.fillna(0.0) * 100 - # noinspection PyUnresolvedReferences @classmethod def _get_rsi(cls, df, window=None): """ Calculate the RSI (Relative Strength Index) within N periods @@ -409,7 +390,7 @@ def _get_wave_trend(cls, df): n1 = cls.WAVE_TREND_1 n2 = cls.WAVE_TREND_2 - tp = cls._middle(df) + tp = cls._tp(df) esa = cls._ema(tp, n1) d = cls._ema((tp - esa).abs(), n1) ci = (tp - esa) / (0.015 * d) @@ -430,6 +411,7 @@ def _get_smma(cls, df, column, windows): """ get smoothed moving average. :param df: data + :param column: the column to calculate :param windows: range :return: result series """ @@ -439,6 +421,14 @@ def _get_smma(cls, df, column, windows): @classmethod def _get_trix(cls, df, column=None, windows=None): + """ Triple Exponential Average + + https://www.investopedia.com/articles/technical/02/092402.asp + :param df: data + :param column: the column to calculate + :param windows: range + :return: result series + """ column_name = "" if column is None and windows is None: column_name = 'trix' @@ -487,7 +477,7 @@ def _get_tema(cls, df, column=None, windows=None): df[column_name] = 3 * single - 3 * double + triple @classmethod - def _get_wr(cls, df, window): + def _get_wr(cls, df, window=None): """ Williams Overbought/Oversold Index Definition: https://www.investopedia.com/terms/w/williamsr.asp @@ -500,11 +490,16 @@ def _get_wr(cls, df, window): :param window: number of periods :return: None """ + if window is None: + window = cls.WR + column_name = 'wr' + else: + column_name = 'wr_{}'.format(window) + window = cls.get_int_positive(window) ln = cls._mov_min(df['low'], window) hn = cls._mov_max(df['high'], window) - column_name = 'wr_{}'.format(window) df[column_name] = (hn - df['close']) / (hn - ln) * -100 @classmethod @@ -526,7 +521,7 @@ def _get_cci(cls, df, window=None): column_name = 'cci_{}'.format(window) window = cls.get_int_positive(window) - tp = cls._middle(df) + tp = cls._tp(df) tp_sma = cls._sma(tp, window) rolling = tp.rolling(min_periods=1, center=False, window=window) md = rolling.apply(lambda x: np.fabs(x - x.mean()).mean()) @@ -547,6 +542,8 @@ def _tr(cls, df): def _get_tr(cls, df): """ True Range of the trading + TR is a measure of volatility of a High-Low-Close series + tr = max[(high - low), abs(high - close_prev), abs(low - close_prev)] :param df: data :return: None @@ -579,7 +576,7 @@ def _get_atr(cls, df, window=None): @classmethod def _get_dma(cls, df): - """ Different of Moving Average + """ Difference of Moving Average default to 10 and 50. :param df: data @@ -723,7 +720,18 @@ def _get_kdj_default(cls, df): @classmethod def _get_cr(cls, df, window=26): - middle = cls._middle(df) + """ Energy Index (Intermediate Willingness Index) + + https://support.futunn.com/en/topic167/?lang=en-us + Use the relationship between the highest price, the lowest price and + yesterday's middle price to reflect the market's willingness to buy + and sell. + + :param df: data + :param window: window of the moving sum + :return: None + """ + middle = cls._tp(df) last_middle = cls._shift(middle, -1) ym = cls._shift(middle, -1) high = df['high'] @@ -732,23 +740,27 @@ def _get_cr(cls, df, window=26): p2_m = pd.concat((last_middle, low), axis=1).min(axis=1) p1 = cls._mov_sum(high - p1_m, window) p2 = cls._mov_sum(ym - p2_m, window) - df['cr'] = p1 / p2 * 100 - df['cr-ma1'] = cls._shifted_cr_sma(df, cls.CR_MA1) - df['cr-ma2'] = cls._shifted_cr_sma(df, cls.CR_MA2) - df['cr-ma3'] = cls._shifted_cr_sma(df, cls.CR_MA3) + df['cr'] = cr = p1 / p2 * 100 + df['cr-ma1'] = cls._shifted_cr_sma(cr, cls.CR_MA1) + df['cr-ma2'] = cls._shifted_cr_sma(cr, cls.CR_MA2) + df['cr-ma3'] = cls._shifted_cr_sma(cr, cls.CR_MA3) @classmethod - def _shifted_cr_sma(cls, df, window): - cr = cls._sma(df['cr'], window) - return cls._shift(cr, -int(window / 2.5 + 1)) + def _shifted_cr_sma(cls, cr, window): + cr_sma = cls._sma(cr, window) + return cls._shift(cr_sma, -int(window / 2.5 + 1)) @classmethod - def _middle(cls, df): + def _tp(cls, df): return (df['close'] + df['high'] + df['low']).divide(3.0) + @classmethod + def _get_tp(cls, df): + df['tp'] = cls._tp(df) + @classmethod def _get_middle(cls, df): - df['middle'] = cls._middle(df) + df['middle'] = cls._tp(df) @classmethod def _calc_kd(cls, column): @@ -904,7 +916,7 @@ def _get_macd(cls, df): def _get_ppo(cls, df): """ Percentage Price Oscillator - http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:price_oscillators_ppo + https://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:price_oscillators_ppo Percentage Price Oscillator (PPO): {(12-day EMA - 26-day EMA)/26-day EMA} x 100 @@ -982,8 +994,7 @@ def _get_vwma(cls, df, window=None): column_name = 'vwma_{}'.format(window) window = cls.get_int_positive(window) - tp = df['middle'] - tpv = df['volume'] * tp + tpv = df['volume'] * cls._tp(df) rolling_tpv = cls._mov_sum(tpv, window) rolling_vol = cls._mov_sum(df['volume'], window) df[column_name] = rolling_tpv / rolling_vol @@ -1040,7 +1051,7 @@ def _get_mfi(cls, df, window=None): else: column_name = 'mfi_{}'.format(window) window = cls.get_int_positive(window) - middle = cls._middle(df) + middle = cls._tp(df) money_flow = (middle * df["volume"]).fillna(0.0) shifted = cls._shift(middle, -1) delta = (middle - shifted).fillna(0) @@ -1057,8 +1068,7 @@ def _get_mfi(cls, df, window=None): def _get_kama(cls, df, column, windows, fasts=None, slows=None): """ get Kaufman's Adaptive Moving Average. Implemented after - 'https://school.stockcharts.com/doku.php?id=technical_ - indicators:kaufman_s_adaptive_moving_average' + https://school.stockcharts.com/doku.php?id=technical_indicators:kaufman_s_adaptive_moving_average :param df: data :param column: column to calculate @@ -1169,6 +1179,7 @@ def __init_not_exist_column(cls, df, key): ('stochrsi',): cls._get_stochrsi, ('rate',): cls._get_rate, ('middle',): cls._get_middle, + ('tp',): cls._get_tp, ('boll', 'boll_ub', 'boll_lb'): cls._get_boll, ('macd', 'macds', 'macdh'): cls._get_macd, ('ppo', 'ppos', 'ppoh'): cls._get_ppo, @@ -1188,6 +1199,7 @@ def __init_not_exist_column(cls, df, key): ('log-ret',): cls._get_log_ret, ('mfi',): cls._get_mfi, ('wt1', 'wt2'): cls._get_wave_trend, + ('wr',): cls._get_wr, } for names, handler in handlers.items(): if key in names: @@ -1208,12 +1220,8 @@ def __init_not_exist_column(cls, df, key): getattr(cls, func_name)(df, c, r, s, f) elif len(ret) == 3: c, r, t = ret - if t in cls.OPERATORS: - # support all kinds of compare operators - cls._get_op(df, c, r, t) - else: - func_name = '_get_{}'.format(t) - getattr(cls, func_name)(df, c, r) + func_name = '_get_{}'.format(t) + getattr(cls, func_name)(df, c, r) elif len(ret) == 2: c, r = ret func_name = '_get_{}'.format(c) @@ -1233,8 +1241,7 @@ def __init_column(df, key): def __getitem__(self, item): try: - result = self.retype( - super(StockDataFrame, self).__getitem__(item)) + result = wrap(super(StockDataFrame, self).__getitem__(item)) except KeyError: try: if isinstance(item, list): @@ -1243,9 +1250,8 @@ def __getitem__(self, item): else: self.__init_column(self, item) except AttributeError: - log.exception('{} not found.'.format(item)) - result = self.retype( - super(StockDataFrame, self).__getitem__(item)) + pass + result = wrap(super(StockDataFrame, self).__getitem__(item)) return result def till(self, end_date): @@ -1258,7 +1264,7 @@ def within(self, start_date, end_date): return self.start_from(start_date).till(end_date) def copy(self, deep=True): - return self.retype(super(StockDataFrame, self).copy(deep)) + return wrap(super(StockDataFrame, self).copy(deep)) def _ensure_type(self, obj): """ override the method in pandas, omit the check diff --git a/test.py b/test.py index 153084d..1c45f26 100644 --- a/test.py +++ b/test.py @@ -69,7 +69,8 @@ def test_delta(self): assert_that(len(stock['volume_delta']), greater_than(1)) assert_that(stock.loc[20141219]['volume_delta'], equal_to(-63383600)) - def test_must_have_positive_int(self): + @staticmethod + def test_must_have_positive_int(): def do(): Sdf.get_int_positive("-54") @@ -82,18 +83,19 @@ def test_multiple_columns(self): def test_column_le_count(self): stock = self.get_stock_20day() - c = 'close_13.01_le_5_c' - stock.get(c) - assert_that(stock.loc[20110117][c], equal_to(1)) - assert_that(stock.loc[20110119][c], equal_to(3)) + stock['res'] = stock['close'] <= 13.01 + count = stock.get('res_5_c') + assert_that(count.loc[20110117], equal_to(1)) + assert_that(count.loc[20110119], equal_to(3)) def test_column_ge_future_count(self): stock = self.get_stock_20day() - c = stock['close_12.8_ge_5_fc'] - assert_that(c.loc[20110119], equal_to(1)) - assert_that(c.loc[20110117], equal_to(1)) - assert_that(c.loc[20110113], equal_to(3)) - assert_that(c.loc[20110111], equal_to(4)) + stock['res'] = stock['close'] >= 12.8 + count = stock['res_5_fc'] + assert_that(count.loc[20110119], equal_to(1)) + assert_that(count.loc[20110117], equal_to(1)) + assert_that(count.loc[20110113], equal_to(3)) + assert_that(count.loc[20110111], equal_to(4)) def test_column_delta(self): stock = self.get_stock_20day() @@ -126,10 +128,18 @@ def test_column_rate_plus2(self): assert_that(open_r.loc[20110119], equal_to(0.0)) assert_that(open_r.loc[20110120], equal_to(0.0)) + def test_change(self): + stock = self.get_stock_20day() + change = stock['change'] + assert_that(change.loc[20110107], near_to(4.4198)) + def test_middle(self): stock = self.get_stock_20day() middle = stock['middle'] - assert_that(middle.loc[20110104], near_to(12.53)) + tp = stock['tp'] + idx = 20110104 + assert_that(middle.loc[idx], near_to(12.53)) + assert_that(tp.loc[idx], equal_to(middle.loc[idx])) def test_cr(self): stock = self.get_stock_90day() @@ -375,14 +385,6 @@ def test_to_int_dedup(self): shifts = Sdf.to_ints('3, -3~-1, 5, -2~-1') assert_that(shifts, contains_exactly(-3, -2, -1, 3, 5)) - def test_to_floats(self): - floats = Sdf.to_floats('1.3, 4, -12.5, 4.0') - assert_that(floats, contains_exactly(-12.5, 1.3, 4)) - - def test_to_float(self): - number = Sdf.to_float('12.3') - assert_that(number, equal_to(12.3)) - def test_is_cross_columns(self): assert_that(Sdf.is_cross_columns('a_x_b'), equal_to(True)) assert_that(Sdf.is_cross_columns('a_xu_b'), equal_to(True)) @@ -406,9 +408,10 @@ def test_get_log_ret(self): stock.get('log-ret') assert_that(stock.loc[20110128]['log-ret'], near_to(-0.010972)) - def test_rsv_nan_value(self): - s = Sdf.retype(pd.read_csv(get_file('asml.as.csv'))) - df = Sdf.retype(s) + @staticmethod + def test_rsv_nan_value(): + s = wrap(pd.read_csv(get_file('asml.as.csv'))) + df = wrap(s) assert_that(df['rsv_9'][0], equal_to(0.0)) def test_get_rsi(self): @@ -434,11 +437,13 @@ def test_get_stoch_rsi(self): assert_that(stoch_rsi_14.loc[idx], near_to(stoch_rsi.loc[idx])) def test_get_wr(self): - self._supor.get('wr_10') - self._supor.get('wr_6') + wr = self._supor.get('wr') + wr_6 = self._supor.get('wr_6') + wr_14 = self._supor.get('wr_14') idx = 20160817 - assert_that(self._supor.loc[idx, 'wr_10'], near_to(-13.0573)) - assert_that(self._supor.loc[idx, 'wr_6'], near_to(-16.5322)) + assert_that(wr_14.loc[idx], near_to(-49.1620)) + assert_that(wr_6.loc[idx], near_to(-16.5322)) + assert_that(wr.loc[idx], equal_to(wr_14.loc[idx])) def test_get_cci(self): stock = self._supor.within(20160701, 20160831)