Plagiarism Checker
🔑 ID:
34617
👨💻
Python
🕒
17/03/2024
Free
Description:
This code will run a Plagiarism detection on any piece of text you need, and give you a percentage as a result. However, make sure you get your own SERPAPI API Key and implement it in a .env file like this:
SERPER_API_KEY = “YOUR_API_KEY”
Code:
from SimplerLLM.tools.serp import search_with_serper_api import re from typing import List import time def search_chunk(chunk) -> bool: """ Searches for a given chunk of text on the internet using SerperAPI which accesses Google's Engines. Parameters: - chunk: A string representing the text chunk to search for. Returns: - A boolean indicating whether the chunk was found online (True) or not (False). """ try: search_results = search_with_serper_api(f"\"{chunk}\"") found = len(search_results) > 0 return found except Exception as e: print(f"An error occurred: {e}") return False def chunk_text(text, chunk_by) -> List[str]: """ Splits the input text into chunks based on the specified granularity (sentences or paragraphs). Parameters: - text: The input text to be chunked. - chunk_by: The granularity for chunking ('sentence' or 'paragraph'). Returns: - A list of strings, where each string is a chunk of the original text. """ if chunk_by == "sentence": sentences = re.split(r'(?<!\d)[.?!](?!\d)', text) sentences = [sentence.strip() for sentence in sentences if sentence.strip()] return sentences elif chunk_by == "paragraph": paragraphs = [paragraph.strip() for paragraph in text.split("\n") if paragraph.strip()] return paragraphs else: raise ValueError("Invalid chunk_by value. Choose 'sentence' or 'paragraph'.") def calculate_plagiarism_score(text, chunk_by) -> float: """ Calculates the plagiarism score of a given text by chunking it and checking each chunk for plagiarism. Parameters: - text: The input text to check for plagiarism. - chunk_by: The granularity for chunking the text ('sentence' or 'paragraph'). Returns: - The plagiarism score as a float, representing the percentage of plagiarised content. """ chunks = chunk_text(text, chunk_by) total_chunks = len(chunks) plagiarised_chunks = 0 for chunk in chunks: if search_chunk(chunk): plagiarised_chunks += 1 plagiarism_score = (plagiarised_chunks / total_chunks) * 100 if total_chunks > 0 else 0 return plagiarism_score #MAIN SECTION start_time = time.time() text = """ INPUT_TEXT """ # The Input Text chunk_by = "sentence" # "sentence" or "paragraph" plagiarism_score = calculate_plagiarism_score(text, chunk_by) end_time = time.time() # Record the end time runtime = end_time - start_time # Calculate the runtime print(f"Plagiarism Score: {plagiarism_score}%") print(f"Runtime: {runtime} seconds") # Print the runtime
GitHub Link
✖️ Not Available
Download File
✖️ Not Available
If you’re encountering any problems or need further assistance with this code, we’re here to help! Join our community on the forum or Discord for support, tips, and discussion.