简体   繁体   中英

Using regex or BeautifulSoup to find a word or number after a keyword

I'm trying to scrape bloomberg to be able to get data in a concise list. The data looks like this:

    <script type="text/javascript">
var ClientApp = require('app/ClientApp');
var clientApp = new ClientApp();
clientApp.start({
environmentConfig: {"appRoot":"","assetManifest":{"public/images/marketdata-quoteshare-image.png":"//assets.bwbx.io/markets/public/images/marketdata-quoteshare-image.31c2f976.png","public/javascripts/application.js":"//assets.bwbx.io/markets/public/javascripts/application.72f7c0c6.js","public/stylesheets/application.css":"//assets.bwbx.io/markets/public/stylesheets/application.922ef728.css","public/stylesheets/when_than.css":"//assets.bwbx.io/markets/public/stylesheets/when_than.d41d8cd9.css"},"clientApp":"app/ClientApp","searchConfig":{"host":"http://search.bloomberg.com"},"sparklineEndpoint":"http://www.bloomberg.com/api/intraday/sparkline/","tracking":{"chartbeat":{"domain":"bloomberg.com","uid":"15087"},"comscore":{"clientId":"3005059","site":"bloomberg","siteIdentifier":"004"},"googleAnalytics":{"gaAccount":"UA-11413116-1"},"personalization":{"site":"","trackingUrl":"http://tracking.bloomberg.com/images/tracking-pixel.gif"}},"featureFlags":{"renderChartsOnClient":true},"watchlist":{"watchlistUrl":"http://www.bloomberg.com/markets/watchlist","sslProxyUrl":"https://login.bloomberg.com/watchlist/markets/watchlist/proxy"},"brisket:wantsCookies":false},
bootstrappedData: {"/markets/api/quote-page/IBM%3AUS":{"securityType":"COMMON_STOCK","basicQuote":{"name":"International Business Machines Corp","primaryExchange":"New York","price":169.05,"issuedCurrency":"USD","priceChange1Day":1.32,"percentChange1Day":0.78697902,"nyPriceTime":"14:00:33.000","nyPriceDate":"2015-06-23","priceMinDecimals":2,"nyTradeStartTime":"09:30:00.000","nyTradeEndTime":"16:30:00.000","timeZoneOffset":-4,"id":"IBM:US"},"detailedQuote":{"priceEarningsRatio":10.05077193,"earningsPerShare":16.780801,"priceMinDecimals":2,"fundamentalDataCurrency":"USD","marketCap":166085247740.22,"issuedCurrency":"USD","sharesOutstanding":984734100,"priceToSalesRatio":1.85452247,"indicatedGrossDividendYield":3.0830343,"bicsSector":"Technology","bicsIndustry":"Technology Services","bicsSubIndustry":"IT Services","openPrice":168.38,"lowPrice":167.573,"highPrice":169.97,"volume":2346263,"previousClosingPriceOneTradingDayAgo":167.73,"lowPrice52Week":149.52,"highPrice52Week":196.4,"totalReturn1Year":-4.657826,"id":"IBM:US"},"priceTimeSeries":[{"id":"IBM:US","price":[{"dateTime":"2015-06-23T13:30:00.000Z","value":167.72},{"dateTime":"2015-06-23T13:35:00.000Z","value":167.89},{"dateTime":"2015-06-23T13:40:00.000Z","value":167.882},{"dateTime":"2015-06-23T13:45:00.000Z","value":168.54},{"dateTime":"2015-06-23T13:50:00.000Z","value":168.97},{"dateTime":"2015-06-23T13:55:00.000Z","value":169.34},{"dateTime":"2015-06-23T14:00:00.000Z","value":169.77},{"dateTime":"2015-06-23T14:05:00.000Z","value":169.71},{"dateTime":"2015-06-23T14:10:00.000Z","value":169.63},{"dateTime":"2015-06-23T14:15:00.000Z","value":169.26},{"dateTime":"2015-06-23T14:20:00.000Z","value":169.29},{"dateTime":"2015-06-23T14:25:00.000Z","value":169.23},{"dateTime":"2015-06-23T14:30:00.000Z","value":169.2},{"dateTime":"2015-06-23T14:35:00.000Z","value":169.04},{"dateTime":"2015-06-23T14:40:00.000Z","value":168.99},{"dateTime":"2015-06-23T14:45:00.000Z","value":169.06},{"dateTime":"2015-06-23T14:50:00.000Z","value":169.14},{"dateTime":"2015-06-23T14:55:00.000Z","value":169.01},{"dateTime":"2015-06-23T15:00:00.000Z","value":169.02},{"dateTime":"2015-06-23T15:05:00.000Z","value":169.225},{"dateTime":"2015-06-23T15:10:00.000Z","value":169.14},{"dateTime":"2015-06-23T15:15:00.000Z","value":169.08},{"dateTime":"2015-06-23T15:20:00.000Z","value":168.86},{"dateTime":"2015-06-23T15:25:00.000Z","value":168.66},{"dateTime":"2015-06-23T15:30:00.000Z","value":168.69},{"dateTime":"2015-06-23T15:35:00.000Z","value":168.79},{"dateTime":"2015-06-23T15:40:00.000Z","value":168.8384},{"dateTime":"2015-06-23T15:45:00.000Z","value":168.83},{"dateTime":"2015-06-23T15:50:00.000Z","value":168.78},{"dateTime":"2015-06-23T15:55:00.000Z","value":168.69},{"dateTime":"2015-06-23T16:00:00.000Z","value":168.66},{"dateTime":"2015-06-23T16:05:00.000Z","value":168.54},{"dateTime":"2015-06-23T16:10:00.000Z","value":168.49},{"dateTime":"2015-06-23T16:15:00.000Z","value":168.5351},{"dateTime":"2015-06-23T16:20:00.000Z","value":168.499},{"dateTime":"2015-06-23T16:25:00.000Z","value":168.78},{"dateTime":"2015-06-23T16:30:00.000Z","value":168.77},{"dateTime":"2015-06-23T16:35:00.000Z","value":168.755},{"dateTime":"2015-06-23T16:40:00.000Z","value":168.7},{"dateTime":"2015-06-23T16:45:00.000Z","value":168.7},{"dateTime":"2015-06-23T16:50:00.000Z","value":168.64},{"dateTime":"2015-06-23T16:55:00.000Z","value":168.64},{"dateTime":"2015-06-23T17:00:00.000Z","value":168.74},{"dateTime":"2015-06-23T17:05:00.000Z","value":168.69},{"dateTime":"2015-06-23T17:10:00.000Z","value":168.77},{"dateTime":"2015-06-23T17:15:00.000Z","value":168.75},{"dateTime":"2015-06-23T17:20:00.000Z","value":168.77},{"dateTime":"2015-06-23T17:25:00.000Z","value":168.72},{"dateTime":"2015-06-23T17:30:00.000Z","value":168.79},{"dateTime":"2015-06-23T17:35:00.000Z","value":168.85},{"dateTime":"2015-06-23T17:40:00.000Z","value":169.02},{"dateTime":"2015-06-23T17:45:00.000Z","value":168.975},{"dateTime":"2015-06-23T17:50:00.000Z","value":169.01},{"dateTime":"2015-06-23T17:55:00.000Z","value":169.03},{"dateTime":"2015-06-23T18:00:00.000Z","value":169.04}],"dateTimeRanges":{"start":"2015-06-23T13:30:00Z","end":"2015-06-23T20:30:00Z"},"timeZoneOffset":-4,"nyTradeStartTime":"09:30:00.000","nyTradeEndTime":"16:30:00.000","priceMinDecimals":2,"previousClosingPriceOneTradingDayAgo":167.73}],"marketStatus":{"marketStatus":"ACTV","ultimateParentTicker":null,"parentMarketStatus":null,"id":"IBM:US"},"companyNews":{"news":[{"id":"NQD4C46JIJUT01","headline":"IBM’s Rometty Raises Profile for Women at Sun Valley Confab","publishedAt":"2015-06-22T22:50:36-04:00","url":"http://www.bloomberg.com/news/articles/2015-06-22/ibm-s-rometty-raises-profile-for-women-at-sun-valley-conference","primaryCategory":"technology","publishedAtDisplay":"Jun 22, 2015","personalizationTrackingMetaData":"data-resource-type=\"article\" data-resource-id=\"NQD4C46JIJUT01\""},{"id":"NQ7N3CSYF02F01","headline":"Mellanox Battles Intel Aided by Mentalist Who Wowed Kardashian","publishedAt":"2015-06-22T10:04:37-04:00","url":"http://www.bloomberg.com/news/articles/2015-06-20/upstart-battles-intel-with-aid-of-mentalist-who-wowed-kardashian","primaryCategory":"technology","publishedAtDisplay":"Jun 22, 2015","personalizationTrackingMetaData":"data-resource-type=\"article\" data-resource-id=\"NQ7N3CSYF02F01\""},{"id":"NQ76QW6KLVRB01","headline":"EBay Sells Back 28.4% Stake in Craigslist, Ending Litigation","publishedAt":"2015-06-19T16:16:03-04:00","url":"http://www.bloomberg.com/news/articles/2015-06-19/ebay-sells-back-28-4-stake-in-craigslist-ending-litigation","primaryCategory":"technology","publishedAtDisplay":"Jun 19, 2015","personalizationTrackingMetaData":"data-resource-type=\"article\" data-resource-id=\"NQ76QW6KLVRB01\""},{"id":"NQ3NDSSYF01T01","headline":"Oracle Profit, Sales Miss Estimates","publishedAt":"2015-06-18T16:09:56-04:00","url":"http://www.bloomberg.com/news/articles/2015-06-17/oracle-profit-sales-miss-estimates-on-currency-fluctuations","primaryCategory":"technology","publishedAtDisplay":"Jun 18, 2015","personalizationTrackingMetaData":"data-resource-type=\"article\" data-resource-id=\"NQ3NDSSYF01T01\""},{"id":"NQ5GTO6JTSEB01","headline":"1985: The Year Hollywood Discovered Nerds Are Cool","publishedAt":"2015-06-18T13:05:48-04:00","url":"http://www.bloomberg.com/news/articles/2015-06-18/1985-the-year-hollywood-discovered-nerds-are-cool","primaryCategory":"culture","publishedAtDisplay":"Jun 18, 2015","personalizationTrackingMetaData":"data-resource-type=\"article\" data-resource-id=\"NQ5GTO6JTSEB01\""},{"id":"NQ573I6VDKHV01","headline":"Wimbledon Follows U.S. Open Golf in Banning Live Video Streaming","publishedAt":"2015-06-18T10:03:01-04:00","url":"http://www.bloomberg.com/news/articles/2015-06-18/wimbledon-follows-u-s-open-golf-in-banning-live-video-streaming","primaryCategory":"industries","publishedAtDisplay":"Jun 18, 2015","personalizationTrackingMetaData":"data-resource-type=\"article\" data-resource-id=\"NQ573I6VDKHV01\""},{"id":"NOR89C6JTSEE01","headline":"Microsoft Faces Russian Hurdles as Local Software Thrives","publishedAt":"2015-06-17T11:06:39-04:00","url":"http://www.bloomberg.com/news/articles/2015-06-17/microsoft-oracle-face-russian-hurdles-as-local-software-thrives","primaryCategory":"technology","publishedAtDisplay":"Jun 17, 2015","personalizationTrackingMetaData":"data-resource-type=\"article\" data-resource-id=\"NOR89C6JTSEE01\""},{"id":"NPUOUYSYF02E01","headline":"Trash-Talking Rival CEO Vows to Beat IBM on Cybersecurity Deals","publishedAt":"2015-06-15T10:18:00-04:00","url":"http://www.bloomberg.com/news/articles/2015-06-13/trash-talking-rival-ceo-vows-to-beat-ibm-on-cybersecurity-deals","primaryCategory":"technology","publishedAtDisplay":"Jun 15, 2015","personalizationTrackingMetaData":"data-resource-type=\"article\" data-resource-id=\"NPUOUYSYF02E01\""},{"id":"NPU7FGSYF01S01","headline":"London Tech Week Seeks to Boost U.S. Investment in Startups","publishedAt":"2015-06-15T05:34:23-04:00","url":"http://www.bloomberg.com/news/articles/2015-06-15/london-tech-week-seeks-to-boost-800-million-of-u-s-investment","primaryCategory":"technology","publishedAtDisplay":"Jun 15, 2015","personalizationTrackingMetaData":"data-resource-type=\"article\" data-resource-id=\"NPU7FGSYF01S01\""},{"id":"NPUEXTSYF01T01","headline":"IBM to Invest ‘Hundreds of Millions’ in Free Data Technology","publishedAt":"2015-06-15T00:01:00-04:00","url":"http://www.bloomberg.com/news/articles/2015-06-15/ibm-to-invest-hundreds-of-millions-in-free-data-technology-iaxdikxe","primaryCategory":"technology","publishedAtDisplay":"Jun 15, 2015","personalizationTrackingMetaData":"data-resource-type=\"article\" data-resource-id=\"NPUEXTSYF01T01\""}],"id":"IBM:US"},"companyVideos":{"news":[{"id":"NQ0CU96JIJUT","headline":"Alibaba's Netflix Strategy: Bloomberg West (06/15)","publishedAt":"2015-06-15T22:51:45.396Z","url":"http://www.bloomberg.com/news/videos/2015-06-15/alibaba-s-netflix-strategy-bloomberg-west-06-15-","primaryCategory":"technology","thumbnail":"http://www.bloomberg.com/image/ijBRVETIQSyc.jpg","duration":"22:39"},{"id":"NQ08S46K50XT","headline":"Why IBM Wants to Bring Spark Mainstream","publishedAt":"2015-06-15T21:24:04.605Z","url":"http://www.bloomberg.com/news/videos/2015-06-15/why-ibm-wants-to-bring-spark-mainstream","primaryCategory":"technology","thumbnail":"http://www.bloomberg.com/image/iHtcdxtkdyr8.jpg","duration":"4:12"},{"id":"NPZHIG6TTDSB","headline":"Coming Next in Business: The On-Demand Sales Force","publishedAt":"2015-06-15T11:35:04.804Z","url":"http://www.bloomberg.com/news/videos/2015-06-15/coming-next-in-business-the-on-demand-sales-force","primaryCategory":"technology","thumbnail":"http://www.bloomberg.com/image/i8g9QeyWJyjE.jpg","duration":"2:38"}],"id":"IBM:US"},"pressReleases":{"pressReleases":[{"id":"a00WmfzBs93w","headline":"IBM Studio - Groningen to Create Data-Driven Digital Experiences for Clients ","publishedAt":"2015-06-23T11:00:05-04:00","publishedAtDisplay":"3 hours ago","url":"http://bloomberg.com/article/2015-06-23/a00WmfzBs93w.html"},{"id":"aoJ6HOi9D7I4","headline":"IBM Readies Brands to Capture New Sales Opportunities Driven By Unexpected Spikes in Traffic ","publishedAt":"2015-06-23T10:00:05-04:00","publishedAtDisplay":"10:00 AM","url":"http://bloomberg.com/article/2015-06-23/aoJ6HOi9D7I4.html"},{"id":"azp4qWIDSEn4","headline":"IBM and Bon Appetit Serve Up Chef Watson for All ","publishedAt":"2015-06-23T09:00:10-04:00","publishedAtDisplay":"9:00 AM","url":"http://bloomberg.com/article/2015-06-23/azp4qWIDSEn4.html"},{"id":"adGtRAm_6qXY","headline":"IBM Delivers Docker Based Container Services for Enterprise Cloud Application Developers ","publishedAt":"2015-06-22T13:00:03-04:00","publishedAtDisplay":"Jun 22, 2015","url":"http://bloomberg.com/article/2015-06-22/adGtRAm_6qXY.html"},{"id":"adGbY6bR6dRY","headline":"Photo Release -- Nidec and IBM Japan to Jointly Develop IoT Technology for Better Production Rate and Shorter Downtime of ","publishedAt":"2015-06-22T02:30:07-04:00","publishedAtDisplay":"Jun 22, 2015","url":"http://bloomberg.com/article/2015-06-22/adGbY6bR6dRY.html"},{"id":"aPlFLiFFVbOk","headline":"As Hurricane Season Approaches, IBM and The Weather Company Collaborate on Emergency Management for Cities ","publishedAt":"2015-06-22T00:01:02-04:00","publishedAtDisplay":"Jun 22, 2015","url":"http://bloomberg.com/article/2015-06-22/aPlFLiFFVbOk.html"},{"id":"aGWiLanx93X0","headline":"IBM Brings One of the World's Largest Networks of More Than a Billion Law Enforcement Shareable Documents to the Cloud ","publishedAt":"2015-06-22T00:01:01-04:00","publishedAtDisplay":"Jun 22, 2015","url":"http://bloomberg.com/article/2015-06-22/aGWiLanx93X0.html"},{"id":"aMn.1xd9pm60","headline":"IBM and Sogeti Forge Alliance to Accelerate Hybrid Cloud Development ","publishedAt":"2015-06-18T11:40:01-04:00","publishedAtDisplay":"Jun 18, 2015","url":"http://bloomberg.com/article/2015-06-18/aMn.1xd9pm60.html"},{"id":"aRvHe.JJK3jo","headline":"Wimbledon and IBM Push Digital Boundaries to Enhance Fan and Player Engagement ","publishedAt":"2015-06-18T09:00:06-04:00","publishedAtDisplay":"Jun 18, 2015","url":"http://bloomberg.com/article/2015-06-18/aRvHe.JJK3jo.html"},{"id":"aDiABLcYzRNM","headline":"ZeroTurnaround Integrates With Solutions From IBM, Red Hat, SAP and AWS to Simplify Development and Testing in the Cloud ","publishedAt":"2015-06-18T08:50:00-04:00","publishedAtDisplay":"Jun 18, 2015","url":"http://bloomberg.com/article/2015-06-18/aDiABLcYzRNM.html"}],"id":"IBM:US"},"boardMembers":{"boardMembers":[],"count":14,"id":"IBM:US"},"executives":{"executives":[{"id":"3970902","name":"Virginia M Rometty \"Ginni\"","title":"Chairman/President/CEO","slug":"/3970902-virginia-m-rometty"},{"id":"3320296","name":"Steven A Mills \"Steve\"","title":"Exec VP:Software & Systems","slug":"/3320296-steven-a-mills"},{"id":"15091767","name":"Martin J Schroeter","title":"Senior VP/CFO","slug":"/15091767-martin-j-schroeter"},{"id":"19076604","name":"Arvind Krishna","title":"Senior VP/Dir:Research","slug":"/19076604-arvind-krishna"},{"id":"1857778","name":"John E Kelly","title":"Senior VP:Solutions Portfolio","slug":"/1857778-john-e-kelly"}],"count":22,"id":"IBM:US"},"profile":{"description":"International Business Machines Corporation (IBM) provides computer solutions through the use of advanced information technology. The Company's solutions include technologies, systems, products, services, software, and financing. IBM offers its products through its global sales and distribution organization, as well as through a variety of third party distributors and resellers.","address":["One New Orchard Road","Armonk, NY 10504-1722","United States"],"phone":"1-914-499-1900","website":"www.ibm.com","id":"IBM:US"},"chartDefaultTimeFrame":"1_DAY"}}
});

How could I use beautiful soup or json to get the data under bootstrappedData so that it looks something like this:

primaryExchange: NASDAQ GS
price: 40.6 ...

The elements like primaryExchange, price, etc. don't change, but I want to get what's after the colon, which changes for each company.

Here's what I have so far:

import re
import urllib2
import requests
from bs4 import BeautifulSoup

def scrape():
    ticker = raw_input("Ticker?")
    url = "http://www.bloomberg.com/quote/" + ticker + ":US"
    htmlfile = urllib.urlopen(url)
    htmltext = htmlfile.read()
    bs = BeautifulSoup(htmltext)
    return text.partition("securityType")
print scrape()

I would appreciate any help with how best to use regex or beautifulsoup to get this information.

Thank you

The data looks like JSON. You should use a JSON parser.

This is one way to do it using regex. You can store your data as a string in the variable data .

import re
data = '' #your data as a string
x = re.findall(r'\"\NASDAQ\s\w+\S+\.\d\,', str(data))

Output:

['"NASDAQ GS","price":40.6,']

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM