from bs4 import BeautifulSoup import pandas as pd def strip(string): strings = [] for s in string: strings.append(s.text.strip()) return strings def pad_list(lst, length, pad_value='Unknown'): return lst + [pad_value] * (length - len(lst)) # Replace 'your_local_file.html' with the path to your local HTML file with open('/Users/sbzuc0a/Downloads/Scrape/index.html', 'r', encoding='utf-8') as html_file: content = html_file.read() # Parse the HTML content soup = BeautifulSoup(content, 'html.parser') companies = strip(soup.find_all('h3', class_='media__body__head')) locations = strip(soup.find_all('h5')) places = strip(soup.find_all('span', class_='link-fix--text')) urls_T = soup.find_all('a', class_='media-module__link') # companies = [] # for company in companies_t: # companies.append(company.text.strip()) urls = [] for url in urls_T: urls.append(url.get('href')) # print(companies) # print(locations) # print(places) # print(urls) # print the length of the lists # print(len(companies)) # print(len(locations)) # print(len(places)) # print(len(urls)) locations = pad_list(locations, len(companies)) # # Find the data you want to scrape. For example, let's say you want to scrape a table: # tables = soup.find_all('table') # # Assuming you want to scrape the first table # table = tables[0] # # Use pandas to read the table # data_frame = pd.read_html(str(table))[0] df = pd.DataFrame({ 'Company': companies, 'Location': locations, 'Place': places, 'URL': urls }) # # Now you have the data in a pandas DataFrame, you can manipulate it as you wish print(df) # # Optionally, save the data to a CSV file df.to_csv('output_bandage.csv', index=False)