简体   繁体   中英

Remove unwanted character from string

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import pandas as pd
from csv import writer


options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 10)

url = "https://www.askgamblers.com/online-casinos/reviews/casino-friday"
driver.get(url)


soup = BeautifulSoup(driver.page_source, "html.parser")

product = []

pays=soup.select("div#tabPayments")

for pay in pays:
    try:
        t4=pay.select_one(" .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(2) .review-details__text")
        t4 = [i for i in t4 if i.text]
    except:
        pass
    
supports = soup.find("div", {"id": "tabCustomers"})
supports = supports.find("div", {"class": "review-details__text"})
email = "Support Email:"+supports.text.replace("\n", "").split(":")[1]
print(email)

they show me output like that

['\nSupport\nEmail:\nsupport@casinofriday.com\n', '\n']

but I want output like that:

 Support Email:support@casinofriday.com 

I want to remove all unwanted character from my string kindly recommend any solution these is the page link https://www.askgamblers.com/online-casinos/reviews/casino-friday

Full Code

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import pandas as pd
from csv import writer


options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 10)

url = "https://www.askgamblers.com/online-casinos/reviews/casino-friday"
driver.get(url)


soup = BeautifulSoup(driver.page_source, "html.parser")

product = []

pays = soup.select("div#tabPayments")

for pay in pays:
    try:
        t4 = pay.select_one(
            " .review-details-wrapper+ .review-details-wrapper .review-details__item:nth-child(2) .review-details__text")
        t4 = [i.replace("\n", "") for i in t4 if i.text]
    except:
        pass
    print(t4)
supports = soup.find("div", {"id": "tabCustomers"})
supports = supports.find("div", {"class": "review-details__text"})
email = "Support Email:"+supports.text.replace("\n", "").split(":")[1]
print(email)

Output

['EWallets:0-1 hours', 'Bank Transfers:1-7 days', 'Cheques:Not offered', 'Card Payments:1-7 days', 'Pending Time:0-24 hours']
Support Email:support@casinofriday.com

Hope this helps. Happy Coding:)

Looks like you could utilize two methods to achieve your goals: replace() and split() before appending.

You can remove any unwanted characters in the string with use of .replace() method replacing the unwanted character with nothing, empty string.
Lets say your result string is stored in product string. So, to remove the unwanted [ sign you can do this:

product = product.replace('[', '')

Doing that for all the characters you want to remove will lead for code like this:

product = product.replace('[', '')
product = product.replace('[', '')
product = product.replace('\n', '')
product = product.replace("'", "")
product = product.replace(",", "")
product = product.strip()

The final strip() removes leading and trailing spaces.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM