diff --git a/README.md b/README.md index cb70aa34..9fa7a793 100644 --- a/README.md +++ b/README.md @@ -223,6 +223,55 @@ data = yf.download("SPY AAPL", period="1mo") #### `yf.download()` and `Ticker.history()` have many options for configuring fetching and processing. [Review the Wiki](https://github.com/ranaroussi/yfinance/wiki) for more options and detail. +### Sector and Industry + +The `Sector` and `Industry` modules allow you to access the US market information. + +To initialize, use the relevant sector or industry key as below. (Complete mapping of the keys is available in `const.py`.) + +```python +import yfinance as yf + +tech = yf.Sector('technology') +software = yf.Industry('software-infrastructure') + +# Common information +tech.key +tech.name +tech.symbol +tech.ticker +tech.overview +tech.top_companies +tech.research_reports + +# Sector information +tech.top_etfs +tech.top_mutual_funds +tech.industries + +# Industry information +software.sector_key +software.sector_name +software.top_performing_companies +software.top_growth_companies +``` + +The modules can be chained with Ticker as below. +```python +import yfinance as yf + +# Ticker to Sector and Industry +msft = yf.Ticker('MSFT') +tech = yf.Sector(msft.info.get('sectorKey')) +software = yf.Industry(msft.info.get('industryKey')) + +# Sector and Industry to Ticker +tech_ticker = tech.ticker +tech_ticker.info +software_ticker = software.ticker +software_ticker.history() +``` + ### Logging `yfinance` now uses the `logging` module to handle messages, default behaviour is only print errors. If debugging, use `yf.enable_debug_mode()` to switch logging to debug with custom formatting. diff --git a/yfinance/__init__.py b/yfinance/__init__.py index 0270e009..38930e09 100644 --- a/yfinance/__init__.py +++ b/yfinance/__init__.py @@ -25,6 +25,8 @@ from .multi import download from .utils import enable_debug_mode from .cache import set_tz_cache_location +from .domain.sector import Sector +from .domain.industry import Industry __version__ = version.version __author__ = "Ran Aroussi" @@ -32,4 +34,4 @@ import warnings warnings.filterwarnings('default', category=DeprecationWarning, module='^yfinance') -__all__ = ['download', 'Ticker', 'Tickers', 'enable_debug_mode', 'set_tz_cache_location'] +__all__ = ['download', 'Ticker', 'Tickers', 'enable_debug_mode', 'set_tz_cache_location', 'Sector', 'Industry'] diff --git a/yfinance/const.py b/yfinance/const.py index b88558b8..9265397a 100644 --- a/yfinance/const.py +++ b/yfinance/const.py @@ -1,3 +1,4 @@ +_QUERY1_URL_ = 'https://query1.finance.yahoo.com' _BASE_URL_ = 'https://query2.finance.yahoo.com' _ROOT_URL_ = 'https://finance.yahoo.com' @@ -155,3 +156,152 @@ "recommendationTrend", "futuresChain", ) + +# map last updated as of 2024.09.18 +SECTOR_INDUSTY_MAPPING = { + 'basic-materials': {'specialty-chemicals', + 'gold', + 'building-materials', + 'copper', + 'steel', + 'agricultural-inputs', + 'chemicals', + 'other-industrial-metals-mining', + 'lumber-wood-production', + 'aluminum', + 'other-precious-metals-mining', + 'coking-coal', + 'paper-paper-products', + 'silver'}, + 'communication-services': {'internet-content-information', + 'telecom-services', + 'entertainment', + 'electronic-gaming-multimedia', + 'advertising-agencies', + 'broadcasting', + 'publishing'}, + 'consumer-cyclical': {'internet-retail', + 'auto-manufacturers', + 'restaurants', + 'home-improvement-retail', + 'travel-services', + 'specialty-retail', + 'apparel-retail', + 'residential-construction', + 'footwear-accessories', + 'packaging-containers', + 'lodging', + 'auto-parts', + 'auto-truck-dealerships', + 'gambling', + 'resorts-casinos', + 'leisure', + 'apparel-manufacturing', + 'personal-services', + 'furnishings-fixtures-appliances', + 'recreational-vehicles', + 'luxury-goods', + 'department-stores', + 'textile-manufacturing'}, + 'consumer-defensive': {'discount-stores', + 'beverages-non-alcoholic', + 'household-personal-products', + 'packaged-foods', + 'tobacco', + 'confectioners', + 'farm-products', + 'food-distribution', + 'grocery-stores', + 'beverages-brewers', + 'education-training-services', + 'beverages-wineries-distilleries'}, + 'energy': {'oil-gas-integrated', + 'oil-gas-midstream', + 'oil-gas-e-p', + 'oil-gas-equipment-services', + 'oil-gas-refining-marketing', + 'uranium', + 'oil-gas-drilling', + 'thermal-coal'}, + 'financial-services': {'banks-diversified', + 'credit-services', + 'asset-management', + 'insurance-diversified', + 'banks-regional', + 'capital-markets', + 'financial-data-stock-exchanges', + 'insurance-property-casualty', + 'insurance-brokers', + 'insurance-life', + 'insurance-specialty', + 'mortgage-finance', + 'insurance-reinsurance', + 'shell-companies', + 'financial-conglomerates'}, + 'healthcare': {'drug-manufacturers-general', + 'healthcare-plans', + 'biotechnology', + 'medical-devices', + 'diagnostics-research', + 'medical-instruments-supplies', + 'medical-care-facilities', + 'drug-manufacturers-specialty-generic', + 'health-information-services', + 'medical-distribution', + 'pharmaceutical-retailers'}, + 'industrials': {'aerospace-defense', + 'specialty-industrial-machinery', + 'railroads', + 'building-products-equipment', + 'farm-heavy-construction-machinery', + 'specialty-business-services', + 'integrated-freight-logistics', + 'waste-management', + 'conglomerates', + 'industrial-distribution', + 'engineering-construction', + 'rental-leasing-services', + 'consulting-services', + 'trucking', + 'electrical-equipment-parts', + 'airlines', + 'tools-accessories', + 'pollution-treatment-controls', + 'security-protection-services', + 'marine-shipping', + 'metal-fabrication', + 'infrastructure-operations', + 'staffing-employment-services', + 'airports-air-services', + 'business-equipment-supplies'}, + 'real-estate': {'reit-specialty', + 'reit-industrial', + 'reit-retail', + 'reit-residential', + 'reit-healthcare-facilities', + 'real-estate-services', + 'reit-office', + 'reit-diversified', + 'reit-mortgage', + 'reit-hotel-motel', + 'real-estate-development', + 'real-estate-diversified'}, + 'technology': {'software-infrastructure', + 'semiconductors', + 'consumer-electronics', + 'software-application', + 'information-technology-services', + 'semiconductor-equipment-materials', + 'communication-equipment', + 'computer-hardware', + 'electronic-components', + 'scientific-technical-instruments', + 'solar', + 'electronics-computer-distribution'}, + 'utilities': {'utilities-regulated-electric', + 'utilities-renewable', + 'utilities-diversified', + 'utilities-regulated-gas', + 'utilities-independent-power-producers', + 'utilities-regulated-water'} +} \ No newline at end of file diff --git a/yfinance/domain/__init__.py b/yfinance/domain/__init__.py new file mode 100644 index 00000000..304d92f5 --- /dev/null +++ b/yfinance/domain/__init__.py @@ -0,0 +1,5 @@ +# domain/__init__.py +from .sector import Sector +from .industry import Industry + +__all__ = ['Sector', 'Industry'] \ No newline at end of file diff --git a/yfinance/domain/domain.py b/yfinance/domain/domain.py new file mode 100644 index 00000000..5ad1ad08 --- /dev/null +++ b/yfinance/domain/domain.py @@ -0,0 +1,97 @@ +from ..ticker import Ticker +from ..const import _QUERY1_URL_ +from ..data import YfData +from typing import Dict, List, Optional + +import pandas as _pd + +_QUERY_URL_ = f'{_QUERY1_URL_}/v1/finance' + +class Domain: + def __init__(self, key: str, session=None, proxy=None): + self._key: str = key + self.proxy = proxy + self.session = session + self._data: YfData = YfData(session=session) + + self._name: Optional[str] = None + self._symbol: Optional[str] = None + self._overview: Optional[Dict] = None + self._top_companies: Optional[_pd.DataFrame] = None + self._research_reports: Optional[List[Dict[str, str]]] = None + + @property + def key(self) -> str: + return self._key + + @property + def name(self) -> str: + self._ensure_fetched(self._name) + return self._name + + @property + def symbol(self) -> str: + self._ensure_fetched(self._symbol) + return self._symbol + + @property + def ticker(self) -> Ticker: + self._ensure_fetched(self._symbol) + return Ticker(self._symbol) + + @property + def overview(self) -> Dict: + self._ensure_fetched(self._overview) + return self._overview + + @property + def top_companies(self) -> Optional[_pd.DataFrame]: + self._ensure_fetched(self._top_companies) + return self._top_companies + + @property + def research_reports(self) -> List[Dict[str, str]]: + self._ensure_fetched(self._research_reports) + return self._research_reports + + def _fetch(self, query_url, proxy) -> Dict: + params_dict = {"formatted": "true", "withReturns": "true", "lang": "en-US", "region": "US"} + result = self._data.get_raw_json(query_url, user_agent_headers=self._data.user_agent_headers, params=params_dict, proxy=proxy) + return result + + def _parse_and_assign_common(self, data) -> None: + self._name = data.get('name') + self._symbol = data.get('symbol') + self._overview = self._parse_overview(data.get('overview', {})) + self._top_companies = self._parse_top_companies(data.get('topCompanies', {})) + self._research_reports = data.get('researchReports') + + def _parse_overview(self, overview) -> Dict: + return { + "companies_count": overview.get('companiesCount', None), + "market_cap": overview.get('marketCap', {}).get('raw', None), + "message_board_id": overview.get('messageBoardId', None), + "description": overview.get('description', None), + "industries_count": overview.get('industriesCount', None), + "market_weight": overview.get('marketWeight', {}).get('raw', None), + "employee_count": overview.get('employeeCount', {}).get('raw', None) + } + + def _parse_top_companies(self, top_companies) -> Optional[_pd.DataFrame]: + top_companies_column = ['symbol', 'name', 'rating', 'market weight'] + top_companies_values = [(c.get('symbol'), + c.get('name'), + c.get('rating'), + c.get('marketWeight',{}).get('raw',None)) for c in top_companies] + + if not top_companies_values: + return None + + return _pd.DataFrame(top_companies_values, columns = top_companies_column).set_index('symbol') + + def _fetch_and_parse(self) -> None: + raise NotImplementedError("_fetch_and_parse() needs to be implemented by children classes") + + def _ensure_fetched(self, attribute) -> None: + if attribute is None: + self._fetch_and_parse() \ No newline at end of file diff --git a/yfinance/domain/industry.py b/yfinance/domain/industry.py new file mode 100644 index 00000000..698bce0d --- /dev/null +++ b/yfinance/domain/industry.py @@ -0,0 +1,87 @@ +from __future__ import print_function +from typing import Dict, Optional + +import pandas as _pd + +from .domain import Domain, _QUERY_URL_ +from .. import utils + +class Industry(Domain): + def __init__(self, key, session=None, proxy=None): + super(Industry, self).__init__(key, session, proxy) + self._query_url = f'{_QUERY_URL_}/industries/{self._key}' + + self._sector_key = None + self._sector_name = None + self._top_performing_companies = None + self._top_growth_companies = None + + def __repr__(self): + return f'yfinance.Industry object <{self._key}>' + + @property + def sector_key(self) -> str: + self._ensure_fetched(self._sector_key) + return self._sector_key + + @property + def sector_name(self) -> str: + self._ensure_fetched(self._sector_name) + return self._sector_name + + @property + def top_performing_companies(self) -> Optional[_pd.DataFrame]: + self._ensure_fetched(self._top_performing_companies) + return self._top_performing_companies + + @property + def top_growth_companies(self) -> Optional[_pd.DataFrame]: + self._ensure_fetched(self._top_growth_companies) + return self._top_growth_companies + + def _parse_top_performing_companies(self, top_performing_companies: Dict) -> Optional[_pd.DataFrame]: + compnaies_column = ['symbol','name','ytd return',' last price','target price'] + compnaies_values = [(c.get('symbol', None), + c.get('name', None), + c.get('ytdReturn',{}).get('raw', None), + c.get('lastPrice',{}).get('raw', None), + c.get('targetPrice',{}).get('raw', None),) for c in top_performing_companies] + + if not compnaies_values: + return None + + return _pd.DataFrame(compnaies_values, columns = compnaies_column).set_index('symbol') + + def _parse_top_growth_companies(self, top_growth_companies: Dict) -> Optional[_pd.DataFrame]: + compnaies_column = ['symbol','name','ytd return',' growth estimate'] + compnaies_values = [(c.get('symbol', None), + c.get('name', None), + c.get('ytdReturn',{}).get('raw', None), + c.get('growthEstimate',{}).get('raw', None),) for c in top_growth_companies] + + if not compnaies_values: + return None + + return _pd.DataFrame(compnaies_values, columns = compnaies_column).set_index('symbol') + + def _fetch_and_parse(self) -> None: + result = None + + try: + result = self._fetch(self._query_url, self.proxy) + data = result['data'] + self._parse_and_assign_common(data) + + self._sector_key = data.get('sectorKey') + self._sector_name = data.get('sectorName') + self._top_performing_companies = self._parse_top_performing_companies(data.get('topPerformingCompanies')) + self._top_growth_companies = self._parse_top_growth_companies(data.get('topGrowthCompanies')) + + return result + except Exception as e: + logger = utils.get_yf_logger() + logger.error(f"Failed to get industry data for '{self._key}' reason: {e}") + logger.debug("Got response: ") + logger.debug("-------------") + logger.debug(f" {result}") + logger.debug("-------------") \ No newline at end of file diff --git a/yfinance/domain/sector.py b/yfinance/domain/sector.py new file mode 100644 index 00000000..2ae3a113 --- /dev/null +++ b/yfinance/domain/sector.py @@ -0,0 +1,69 @@ +from __future__ import print_function +from typing import Dict, Optional + +import pandas as _pd + +from .domain import Domain, _QUERY_URL_ +from .. import utils + +class Sector(Domain): + def __init__(self, key, session=None, proxy=None): + super(Sector, self).__init__(key, session, proxy) + self._query_url: str = f'{_QUERY_URL_}/sectors/{self._key}' + + self._top_etfs: Optional[Dict] = None + self._top_mutual_funds: Optional[Dict] = None + self._industries: Optional[_pd.DataFrame] = None + + def __repr__(self): + return f'yfinance.Sector object <{self._key}>' + + @property + def top_etfs(self) -> Dict[str, str]: + self._ensure_fetched(self._top_etfs) + return self._top_etfs + + @property + def top_mutual_funds(self) -> Dict[str, str]: + self._ensure_fetched(self._top_mutual_funds) + return self._top_mutual_funds + + @property + def industries(self) -> _pd.DataFrame: + self._ensure_fetched(self._industries) + return self._industries + + def _parse_top_etfs(self, top_etfs: Dict) -> Dict[str, str]: + return {e.get('symbol'): e.get('name') for e in top_etfs} + + def _parse_top_mutual_funds(self, top_mutual_funds: Dict) -> Dict[str, str]: + return {e.get('symbol'): e.get('name') for e in top_mutual_funds} + + def _parse_industries(self, industries: Dict) -> _pd.DataFrame: + industries_column = ['key','name','symbol','market weight'] + industries_values = [(i.get('key'), + i.get('name'), + i.get('symbol'), + i.get('marketWeight',{}).get('raw', None) + ) for i in industries if i.get('name') != 'All Industries'] + return _pd.DataFrame(industries_values, columns = industries_column).set_index('key') + + def _fetch_and_parse(self) -> None: + result = None + + try: + result = self._fetch(self._query_url, self.proxy) + data = result['data'] + self._parse_and_assign_common(data) + + self._top_etfs = self._parse_top_etfs(data.get('topETFs', {})) + self._top_mutual_funds = self._parse_top_mutual_funds(data.get('topMutualFunds', {})) + self._industries = self._parse_industries(data.get('industries', {})) + + except Exception as e: + logger = utils.get_yf_logger() + logger.error(f"Failed to get sector data for '{self._key}' reason: {e}") + logger.debug("Got response: ") + logger.debug("-------------") + logger.debug(f" {result}") + logger.debug("-------------") \ No newline at end of file