This code will run a Plagiarism detection on any piece of text you need, and give you a percentage as a result. However, make sure you get your own SERPAPI API Key and implement it in a .env file like this:



from import search_with_serper_api
import re
from typing import List
import time 

def search_chunk(chunk) -> bool:
    Searches for a given chunk of text on the internet using SerperAPI which accesses Google's Engines.
    - chunk: A string representing the text chunk to search for.
    - A boolean indicating whether the chunk was found online (True) or not (False).
        search_results = search_with_serper_api(f"\"{chunk}\"")
        found = len(search_results) > 0
        return found
    except Exception as e:
        print(f"An error occurred: {e}")
        return False 

def chunk_text(text, chunk_by) -> List[str]:
    Splits the input text into chunks based on the specified granularity (sentences or paragraphs).
    - text: The input text to be chunked.
    - chunk_by: The granularity for chunking ('sentence' or 'paragraph').
    - A list of strings, where each string is a chunk of the original text.
    if chunk_by == "sentence":
        sentences = re.split(r'(?<!\d)[.?!](?!\d)', text)
        sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
        return sentences
    elif chunk_by == "paragraph":
        paragraphs = [paragraph.strip() for paragraph in text.split("\n") if paragraph.strip()]
        return paragraphs
        raise ValueError("Invalid chunk_by value. Choose 'sentence' or 'paragraph'.")

def calculate_plagiarism_score(text, chunk_by) -> float:
    Calculates the plagiarism score of a given text by chunking it and checking each chunk for plagiarism.
    - text: The input text to check for plagiarism.
    - chunk_by: The granularity for chunking the text ('sentence' or 'paragraph').
    - The plagiarism score as a float, representing the percentage of plagiarised content.
    chunks = chunk_text(text, chunk_by)
    total_chunks = len(chunks)
    plagiarised_chunks = 0
    for chunk in chunks:
        if search_chunk(chunk):
            plagiarised_chunks += 1

    plagiarism_score = (plagiarised_chunks / total_chunks) * 100 if total_chunks > 0 else 0
    return plagiarism_score

start_time = time.time() 

text = """ INPUT_TEXT """ # The Input Text

chunk_by = "sentence"  # "sentence" or "paragraph"
plagiarism_score = calculate_plagiarism_score(text, chunk_by)

end_time = time.time()  # Record the end time
runtime = end_time - start_time  # Calculate the runtime

print(f"Plagiarism Score: {plagiarism_score}%")
print(f"Runtime: {runtime} seconds")  # Print the runtime


