Forum in maintenance, we will back soon 🙂
csv download is not including all the results in a django application
Hi,
im trying to build a web app with python and django, in this application a user can enter a list of websites, the results will show the website status, the website title, the website summary, the email, the phone number, facebook and instagram links if present. At the end the user can download the results as csv, btw the csv is showing only 1 result, and even incomplete (the website, the phone, the email, fb and instagram are missing). What am i doing wrong? attached the base.html and result.html files and here is my views.py file
any idea how i can solve this?
thanks!
# website_checker/checker/views.py from django.shortcuts import render from django.http import HttpResponseRedirect from django.shortcuts import render, HttpResponse from django.urls import reverse import requests from bs4 import BeautifulSoup from .utils import get_business_summary, extract_emails, extract_phones, extract_social_media_links import spacy import re import csv import io # Function to get a business summary from the text def get_business_summary(text): nlp = spacy.load('en_core_web_sm') doc = nlp(text) sentences = [sent.text.strip() for sent in doc.sents] business_summary = '' for sent in sentences: # You can add more conditions to extract business-specific information from the text if 'business' in sent.lower() or 'company' in sent.lower(): business_summary = sent break return business_summary # Function to extract emails from the text def extract_emails(text): # Use regex pattern for email extraction email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' emails = set(re.findall(email_pattern, text)) # Use set to eliminate duplicates return list(emails) # Function to extract phone numbers from the text def extract_phones(text): phone_pattern = re.compile(r'(\+?\d{1,3}[-.\s]?)?(\()?\d{3}(\))?[-.\s]?\d{3}[-.\s]?\d{4}') phones = set() for match in phone_pattern.finditer(text): phone = match.group(0).replace('(', '').replace(')', '').replace('-', '').replace(' ', '').replace('.', '') phones.add(phone) return list(phones) if phones else ['No phones found'] # Function to extract Facebook and Instagram links from the website def extract_social_media_links(soup): facebook_links = [] instagram_links = [] # Find all anchor tags with href attributes anchor_tags = soup.find_all('a', href=True) for tag in anchor_tags: href = tag['href'] if 'facebook.com' in href: facebook_links.append(href) elif 'instagram.com' in href: instagram_links.append(href) # Return the links as lists return facebook_links, instagram_links # Actual implementation of generate_csv function to convert websites_data into CSV format def generate_csv(websites_data): # Prepare CSV data csv_data = io.StringIO() # Create a StringIO object to hold CSV data fieldnames = ['Website', 'Status', 'Title', 'Description', 'Business Summary', 'Emails', 'Phones', 'Facebook', 'Instagram'] # Use DictWriter to write the CSV data writer = csv.DictWriter(csv_data, fieldnames=fieldnames) writer.writeheader() # Write the header row for data in websites_data: # Create a new dictionary with the required fieldnames to avoid extra fields in the CSV row_data = { 'Website': data['url'], 'Status': data['is_down'], 'Title': data['title'], 'Description': data['description'], 'Business Summary': data['business_summary'], 'Emails': ', '.join(data['emails']), 'Phones': ', '.join(data['phones']), 'Facebook': ', '.join(data['facebook_links']), 'Instagram': ', '.join(data['instagram_links']), } # Write the data row writer.writerow(row_data) return csv_data.getvalue() def download_csv(request): if request.method == 'POST': websites_data = request.session.get('websites_data') if websites_data: # Prepare CSV data csv_data = generate_csv(websites_data) # Create and return the CSV response response = HttpResponse(csv_data, content_type='text/csv') response['Content-Disposition'] = 'attachment; filename="websites_data.csv"' return response else: return HttpResponse("No data to download.") else: return HttpResponse("Invalid request method for CSV download.") # Combine the check_websites logic with the home view function def home(request): if request.method == 'POST': website_urls = request.POST.get('website_urls', '').strip() urls_list = website_urls.splitlines() # Remove empty strings from the list urls_list = list(filter(None, urls_list)) print("Request Method:", request.method) # Debugging line print("Website URLs:", urls_list) # Debugging line websites_data = [] for url in urls_list: try: response = requests.get(url) is_down = response.status_code != 200 soup = BeautifulSoup(response.content, 'html.parser') if soup: # Check if the title tag exists title = soup.title if title: title = title.string.strip() if title.string else 'No title available' else: title = 'No title available' # Check if the description meta tag exists description_tag = soup.find('meta', attrs={'name': 'description'}) description = description_tag['content'].strip() if description_tag else 'No description available' # Get the website content for NLP processing website_text = soup.get_text() # Get a brief business summary business_summary = get_business_summary(website_text) # Extract emails using regex pattern emails = extract_emails(website_text) # Extract phone numbers using regex pattern phones = extract_phones(website_text) # Extract Facebook and Instagram links from the website facebook_links, instagram_links = extract_social_media_links(soup) # Remove duplicates from Facebook and Instagram links facebook_links = list(set(facebook_links)) instagram_links = list(set(instagram_links)) else: is_down = True title = 'No title available' description = 'No description available' business_summary = 'Unable to retrieve website content.' emails = [] phones = [] facebook_links = [] instagram_links = [] except requests.exceptions.RequestException: is_down = True title = 'No title available' description = 'No description available' business_summary = 'Unable to retrieve website content.' emails = [] phones = [] facebook_links = [] instagram_links = [] pass # Check the status and set 'UP' or 'Down' accordingly status = 'UP' if not is_down else 'Down' websites_data.append({ 'url': url, 'is_down': is_down, 'title': title, 'description': description, 'business_summary': business_summary, 'emails': emails, 'phones': phones, 'facebook_links': facebook_links, 'instagram_links': instagram_links, 'status': status, }) # Check if the request is for CSV download if request.POST.get('download_csv'): # Save websites_data in the session request.session['websites_data'] = websites_data # Generate the URL for the download view using reverse download_url = reverse('download_csv') # Redirect to the download view return HttpResponseRedirect(download_url) # For normal POST request, render the result table print("Websites Data:", websites_data) # Debugging line return render(request, 'checker/home.html', {'websites_data': websites_data, 'status': status}) # For GET request, display the form to enter website URLs return render(request, 'checker/home.html')
The first thing I would do to debug this is to go to ChatGPT, give it the parameters of what you're trying to do, give it the python code and ask how you can improve the results.
Regards,
Earnie Boyd, CEO
Seasoned Solutions Advisor LLC
Schedule 1-on-1 help
Join me on Slack
-
How to add payment using "Streamlit UI"
2 weeks ago
-
Localhost Information Script
7 months ago
-
How can i remember python scripting
7 months ago
-
A cautionary tale about using Flask in debug mode with Python Threads
9 months ago
-
I don't understand the reason of error.
1 year ago