feat: add initial People Playground mod development kit
This commit is contained in:
43
parsing_docs/extract_sidebar_links.py
Executable file
43
parsing_docs/extract_sidebar_links.py
Executable file
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env -S uv run --script
|
||||
# /// script
|
||||
# requires-python = ">=3.11"
|
||||
# dependencies = [
|
||||
# "bs4",
|
||||
# "requests",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import sys
|
||||
import requests
|
||||
import os
|
||||
|
||||
def download_page(url, filename):
|
||||
"""Download HTML page and save to local file"""
|
||||
if os.path.exists(filename):
|
||||
print(f"Using cached {filename}")
|
||||
return open(filename).read()
|
||||
|
||||
print(f"Downloading {url}...")
|
||||
try:
|
||||
response = requests.get(url, timeout=30)
|
||||
response.raise_for_status()
|
||||
with open(filename, 'w', encoding='utf-8') as f:
|
||||
f.write(response.text)
|
||||
return response.text
|
||||
except requests.RequestException as e:
|
||||
print(f"Error downloading {url}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Download the main index page
|
||||
index_url = 'https://wiki.studiominus.nl/index.html'
|
||||
html = download_page(index_url, 'index.html')
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
|
||||
with open('sidebar.txt', 'w') as f:
|
||||
for link in soup.select('.sidebar-links a[href]'):
|
||||
url = f'https://wiki.studiominus.nl{link['href']}'
|
||||
title = link.get_text().strip()
|
||||
f.write(f'{url} - {title}\n')
|
||||
if 'texturePackSystem.html' in url:
|
||||
break
|
||||
41
parsing_docs/extract_wiki_links.py
Executable file
41
parsing_docs/extract_wiki_links.py
Executable file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env -S uv run --script
|
||||
# /// script
|
||||
# requires-python = ">=3.11"
|
||||
# dependencies = [
|
||||
# "bs4",
|
||||
# "requests",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import sys
|
||||
import requests
|
||||
import os
|
||||
|
||||
def download_page(url, filename):
|
||||
"""Download HTML page and save to local file"""
|
||||
if os.path.exists(filename):
|
||||
print(f"Using cached {filename}")
|
||||
return open(filename).read()
|
||||
|
||||
print(f"Downloading {url}...")
|
||||
try:
|
||||
response = requests.get(url, timeout=30)
|
||||
response.raise_for_status()
|
||||
with open(filename, 'w', encoding='utf-8') as f:
|
||||
f.write(response.text)
|
||||
return response.text
|
||||
except requests.RequestException as e:
|
||||
print(f"Error downloading {url}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Download the internal reference page
|
||||
ref_url = 'https://wiki.studiominus.nl/internalReference.html'
|
||||
html = download_page(ref_url, 'internalReference.html')
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
|
||||
with open('source_documentation.txt', 'w') as f:
|
||||
for link in soup.select('li p a[href^=\"/internalReference/\"]'):
|
||||
url = f"https://wiki.studiominus.nl{link['href']}"
|
||||
title = link.get_text().strip()
|
||||
f.write(f'{url} - {title}\n')
|
||||
382
parsing_docs/index.html
Normal file
382
parsing_docs/index.html
Normal file
File diff suppressed because one or more lines are too long
2376
parsing_docs/internalReference.html
Normal file
2376
parsing_docs/internalReference.html
Normal file
File diff suppressed because one or more lines are too long
303
parsing_docs/process_wiki_urls.py
Executable file
303
parsing_docs/process_wiki_urls.py
Executable file
@@ -0,0 +1,303 @@
|
||||
#!/usr/bin/env -S uv run --script
|
||||
# /// script
|
||||
# requires-python = ">=3.11"
|
||||
# dependencies = [
|
||||
# "playwright",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
"""
|
||||
Script to extract content from People Playground wiki pages.
|
||||
Processes URLs from wiki_links.txt and saves content to text files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import argparse
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
def get_category_from_url(url):
|
||||
"""Extract category from wiki URL and return folder name."""
|
||||
match = re.search(r'https://wiki\.studiominus\.nl/([^/]+)/', url)
|
||||
if match:
|
||||
category = match.group(1)
|
||||
# Map wiki categories to folder names
|
||||
folder_map = {
|
||||
'internalReference': 'internalReference',
|
||||
'tutorials': 'tutorials',
|
||||
'snippets': 'snippets',
|
||||
'details': 'details'
|
||||
}
|
||||
return folder_map.get(category, '')
|
||||
return ''
|
||||
|
||||
|
||||
def create_safe_filename(title, url):
|
||||
"""Create a safe filename from title and URL."""
|
||||
# Remove the common prefix from the title
|
||||
clean_title = title.replace('People Playground Modding - ', '')
|
||||
|
||||
# If title is empty or just the prefix, use the URL path
|
||||
if not clean_title or clean_title == title:
|
||||
path_match = re.search(r'/([^/]+)\.html$', url)
|
||||
if path_match:
|
||||
clean_title = path_match.group(1)
|
||||
else:
|
||||
clean_title = 'page'
|
||||
|
||||
# Replace spaces and special characters with underscores
|
||||
safe_name = re.sub(r'[^a-z0-9]+', '_', clean_title.lower())
|
||||
safe_name = re.sub(r'^_+|_+$', '', safe_name)
|
||||
safe_name = re.sub(r'_+', '_', safe_name)
|
||||
|
||||
return f"{safe_name}.txt"
|
||||
|
||||
def is_file_already_extracted(filename, output_dir, expected_url):
|
||||
"""Check if a file has already been extracted and contains the expected URL."""
|
||||
filepath = os.path.join(output_dir, filename)
|
||||
if not os.path.exists(filepath):
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
first_line = f.readline().strip()
|
||||
# Check if the first line contains the expected URL
|
||||
return first_line == f"URL: {expected_url}"
|
||||
except (IOError, UnicodeDecodeError):
|
||||
return False
|
||||
|
||||
def extract_content_from_page(page):
|
||||
"""Extract main content from the current page."""
|
||||
return page.evaluate("""() => {
|
||||
const mainContent = document.querySelector('.page#markdown-result');
|
||||
|
||||
if (mainContent) {
|
||||
// Clone the main content to avoid modifying the original
|
||||
const contentClone = mainContent.cloneNode(true);
|
||||
|
||||
// Remove any script elements from the clone
|
||||
const scripts = contentClone.querySelectorAll('script');
|
||||
scripts.forEach(script => script.remove());
|
||||
|
||||
// Remove any style elements from the clone
|
||||
const styles = contentClone.querySelectorAll('style');
|
||||
styles.forEach(style => style.remove());
|
||||
|
||||
// Remove the obsolete message if it exists
|
||||
const obsoleteMsg = contentClone.querySelector('.obsolete-message');
|
||||
if (obsoleteMsg) {
|
||||
obsoleteMsg.remove();
|
||||
}
|
||||
|
||||
return {
|
||||
title: document.title,
|
||||
content: contentClone.innerText.trim(),
|
||||
html: contentClone.innerHTML.trim()
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
title: document.title,
|
||||
content: "Main content area not found",
|
||||
html: ""
|
||||
};
|
||||
}
|
||||
}""")
|
||||
|
||||
def process_wiki_urls(input_file=None, summary_name='summary.txt'):
|
||||
"""Process all wiki URLs and extract content."""
|
||||
lines = []
|
||||
|
||||
if input_file:
|
||||
# Use specified input file
|
||||
input_path = input_file
|
||||
if os.path.exists(input_path):
|
||||
with open(input_path, 'r') as f:
|
||||
lines = f.readlines()
|
||||
print(f"Processing URLs from: {input_path}")
|
||||
else:
|
||||
print(f"Error: Specified input file '{input_path}' not found!")
|
||||
return
|
||||
else:
|
||||
# Auto-detect input files
|
||||
input_files = []
|
||||
|
||||
# Check for sidebar.txt
|
||||
if os.path.exists('sidebar.txt'):
|
||||
input_files.append('sidebar.txt')
|
||||
|
||||
# Check for source_documentation.txt
|
||||
if os.path.exists('source_documentation.txt'):
|
||||
input_files.append('source_documentation.txt')
|
||||
|
||||
if not input_files:
|
||||
print("No input files found! Looking for sidebar.txt or source_documentation.txt")
|
||||
print("Usage: python process_wiki_urls.py [--input <filename>]")
|
||||
return
|
||||
|
||||
# Read from all found input files
|
||||
for file_path in input_files:
|
||||
print(f"Processing URLs from: {file_path}")
|
||||
with open(file_path, 'r') as f:
|
||||
lines.extend(f.readlines())
|
||||
|
||||
if not lines:
|
||||
print("No URLs found in input files!")
|
||||
return
|
||||
|
||||
# Parse URLs and titles
|
||||
urls_and_titles = []
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line and ' - ' in line:
|
||||
url, title = line.split(' - ', 1)
|
||||
urls_and_titles.append((url.strip(), title.strip()))
|
||||
|
||||
# Create base output directory in parent directory
|
||||
base_output_dir = '../extracted_wiki_content'
|
||||
os.makedirs(base_output_dir, exist_ok=True)
|
||||
|
||||
# Summary file goes in the same directory as this script
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
summary_output_dir = script_dir
|
||||
|
||||
# Summary data
|
||||
summary = []
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch()
|
||||
page = browser.new_page()
|
||||
|
||||
try:
|
||||
for i, (url, expected_title) in enumerate(urls_and_titles, 1):
|
||||
print(f"Processing {i}/{len(urls_and_titles)}: {url}")
|
||||
|
||||
try:
|
||||
# Try to predict filename from expected title first
|
||||
temp_filename = create_safe_filename(expected_title, url)
|
||||
|
||||
# Get category for the expected filename check
|
||||
temp_category = get_category_from_url(url)
|
||||
if temp_category:
|
||||
temp_output_dir = os.path.join(base_output_dir, temp_category)
|
||||
else:
|
||||
temp_output_dir = base_output_dir
|
||||
|
||||
# Check if already extracted
|
||||
if is_file_already_extracted(temp_filename, temp_output_dir, url):
|
||||
category_path = f"{temp_category}/" if temp_category else ""
|
||||
print(f" ✓ Skipped (already extracted to {category_path}{temp_filename})")
|
||||
summary.append({
|
||||
'url': url,
|
||||
'title': expected_title,
|
||||
'expected_title': expected_title,
|
||||
'filename': temp_filename,
|
||||
'content_length': 0,
|
||||
'skipped': True
|
||||
})
|
||||
continue
|
||||
|
||||
# Navigate to the page
|
||||
page.goto(url, wait_until='networkidle', timeout=30000)
|
||||
|
||||
# Extract content
|
||||
extracted_data = extract_content_from_page(page)
|
||||
|
||||
# Create filename from actual extracted title
|
||||
filename = create_safe_filename(extracted_data['title'], url)
|
||||
|
||||
# Get category and create appropriate folder
|
||||
category = get_category_from_url(url)
|
||||
if category:
|
||||
output_dir = os.path.join(base_output_dir, category)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
else:
|
||||
output_dir = base_output_dir
|
||||
|
||||
filepath = os.path.join(output_dir, filename)
|
||||
|
||||
# Double-check with actual filename (in case title differs from expected)
|
||||
if filename != temp_filename and is_file_already_extracted(filename, output_dir, url):
|
||||
print(f" ✓ Skipped (already extracted to {category}/{filename})")
|
||||
summary.append({
|
||||
'url': url,
|
||||
'title': extracted_data['title'],
|
||||
'expected_title': expected_title,
|
||||
'filename': filename,
|
||||
'content_length': 0,
|
||||
'skipped': True
|
||||
})
|
||||
continue
|
||||
|
||||
# Save content to file
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write(f"URL: {url}\n")
|
||||
f.write(f"Title: {extracted_data['title']}\n")
|
||||
f.write("=" * 50 + "\n\n")
|
||||
f.write(extracted_data['content'])
|
||||
|
||||
# Add to summary
|
||||
summary.append({
|
||||
'url': url,
|
||||
'title': extracted_data['title'],
|
||||
'expected_title': expected_title,
|
||||
'filename': filename,
|
||||
'content_length': len(extracted_data['content'])
|
||||
})
|
||||
|
||||
category_path = f"{category}/" if category else ""
|
||||
print(f" ✓ Saved to {category_path}{filename}")
|
||||
|
||||
# Small delay to be respectful
|
||||
time.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Error processing {url}: {e}")
|
||||
summary.append({
|
||||
'url': url,
|
||||
'title': 'ERROR',
|
||||
'expected_title': expected_title,
|
||||
'filename': 'ERROR',
|
||||
'content_length': 0,
|
||||
'error': str(e)
|
||||
})
|
||||
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
# Create summary file
|
||||
summary_path = os.path.join(summary_output_dir, summary_name)
|
||||
with open(summary_path, 'w', encoding='utf-8') as f:
|
||||
f.write("People Playground Wiki Content Extraction Summary\n")
|
||||
f.write("=" * 50 + "\n\n")
|
||||
f.write(f"Total URLs processed: {len(summary)}\n")
|
||||
f.write(f"Skipped (already extracted): {len([s for s in summary if s.get('skipped', False)])}\n")
|
||||
f.write(f"Successful extractions: {len([s for s in summary if s['title'] != 'ERROR' and not s.get('skipped', False)])}\n")
|
||||
f.write(f"Failed extractions: {len([s for s in summary if s['title'] == 'ERROR'])}\n\n")
|
||||
|
||||
f.write("Extracted Pages:\n")
|
||||
f.write("-" * 30 + "\n")
|
||||
for item in summary:
|
||||
if item.get('skipped', False):
|
||||
f.write(f"• SKIPPED: {item['expected_title']} ({item['filename']})\n")
|
||||
elif item['title'] != 'ERROR':
|
||||
f.write(f"• {item['expected_title']} ({item['filename']})\n")
|
||||
else:
|
||||
f.write(f"• ERROR: {item['expected_title']} - {item['error']}\n")
|
||||
|
||||
skipped_count = len([s for s in summary if s.get('skipped', False)])
|
||||
extracted_count = len([s for s in summary if s['title'] != 'ERROR' and not s.get('skipped', False)])
|
||||
|
||||
print(f"\nExtraction complete! Summary saved to {summary_path}")
|
||||
print(f"Total pages extracted: {extracted_count}")
|
||||
if skipped_count > 0:
|
||||
print(f"Pages skipped (already extracted): {skipped_count}")
|
||||
print(f"Failed extractions: {len([s for s in summary if s['title'] == 'ERROR'])}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Extract content from People Playground wiki pages')
|
||||
parser.add_argument('--input', '-i', help='Input file containing URLs (default: auto-detect sidebar.txt and source_documentation.txt)')
|
||||
parser.add_argument('--summary-name', '-n', default='summary.txt', help='Name for summary file (default: summary.txt)')
|
||||
args = parser.parse_args()
|
||||
|
||||
process_wiki_urls(args.input, args.summary_name)
|
||||
46
parsing_docs/sidebar.txt
Normal file
46
parsing_docs/sidebar.txt
Normal file
@@ -0,0 +1,46 @@
|
||||
https://wiki.studiominus.nl/index.html - Home
|
||||
https://wiki.studiominus.nl/intro.html - Introduction
|
||||
https://wiki.studiominus.nl/intro/fileStructure.html - File Structure
|
||||
https://wiki.studiominus.nl/intro/boilerplate.html - Empty mod template
|
||||
https://wiki.studiominus.nl/details.html - Details
|
||||
https://wiki.studiominus.nl/details/layers.html - Layers
|
||||
https://wiki.studiominus.nl/details/lifecycle.html - Mod Lifecycle
|
||||
https://wiki.studiominus.nl/details/meta.html - Metadata
|
||||
https://wiki.studiominus.nl/details/readme.html - Extended mod description
|
||||
https://wiki.studiominus.nl/details/scriptFiles.html - Script Files
|
||||
https://wiki.studiominus.nl/details/shadyCodeRejection.html - Shady Code Rejection
|
||||
https://wiki.studiominus.nl/details/unity.html - The Unity Engine
|
||||
https://wiki.studiominus.nl/details/builtInComponents.html - Built-in components
|
||||
https://wiki.studiominus.nl/gameAssets.html - Game assets
|
||||
https://wiki.studiominus.nl/tutorials.html - Tutorials
|
||||
https://wiki.studiominus.nl/tutorials/tutorialCreatingMod.html - Creating a mod
|
||||
https://wiki.studiominus.nl/tutorials/tutorialCustomItem.html - Creating a custom item
|
||||
https://wiki.studiominus.nl/tutorials/tutorialCustomBehaviour.html - Creating a custom behaviour
|
||||
https://wiki.studiominus.nl/tutorials/tutorialCustomSyringe.html - Creating a custom syringe
|
||||
https://wiki.studiominus.nl/tutorials/tutorialCustomLiquid.html - Creating a custom liquid
|
||||
https://wiki.studiominus.nl/tutorials/tutorialCustomGun.html - Creating a gun with a custom projectile
|
||||
https://wiki.studiominus.nl/tutorials/tutorialCustomAttachment.html - Creating a custom weapon attachment
|
||||
https://wiki.studiominus.nl/tutorials/tutorialSerialisation.html - Saving, loading, copying, and pasting
|
||||
https://wiki.studiominus.nl/tutorials/tutorialMixing.html - Custom liquid mixers
|
||||
https://wiki.studiominus.nl/snippets.html - Code snippets
|
||||
https://wiki.studiominus.nl/snippets/cartridges.html - Cartridges
|
||||
https://wiki.studiominus.nl/snippets/materials.html - Materials
|
||||
https://wiki.studiominus.nl/snippets/particles.html - Particle effects
|
||||
https://wiki.studiominus.nl/snippets/physicalProperties.html - Physical properties
|
||||
https://wiki.studiominus.nl/snippets/spawnables.html - Spawnable items
|
||||
https://wiki.studiominus.nl/snippets/activation.html - Activation action
|
||||
https://wiki.studiominus.nl/snippets/assignTextures.html - Random sprite assignment
|
||||
https://wiki.studiominus.nl/snippets/backgroundScriptCreation.html - Creating a background script
|
||||
https://wiki.studiominus.nl/snippets/changeEnvironmentSettings.html - Change environment settings
|
||||
https://wiki.studiominus.nl/snippets/createExplosion.html - Create an explosion
|
||||
https://wiki.studiominus.nl/snippets/createLight.html - Creating a light
|
||||
https://wiki.studiominus.nl/snippets/customHuman.html - Custom human sprite
|
||||
https://wiki.studiominus.nl/snippets/debug.html - Debug drawing
|
||||
https://wiki.studiominus.nl/snippets/editingExistingItems.html - Editing pre-existing items
|
||||
https://wiki.studiominus.nl/snippets/entrypoint.html - Empty entry point
|
||||
https://wiki.studiominus.nl/snippets/eventListening.html - Listen for events
|
||||
https://wiki.studiominus.nl/snippets/mapId.html - Map IDs
|
||||
https://wiki.studiominus.nl/snippets/registerItem.html - Registering an item
|
||||
https://wiki.studiominus.nl/snippets/simpleGun.html - Adding a firearm
|
||||
https://wiki.studiominus.nl/snippets/spawnParticle.html - Spawn particles on activation
|
||||
https://wiki.studiominus.nl/snippets/texturePackSystem.html - Basic texture pack system
|
||||
1024
parsing_docs/source_documentation.txt
Normal file
1024
parsing_docs/source_documentation.txt
Normal file
File diff suppressed because it is too large
Load Diff
1080
parsing_docs/summary.txt
Normal file
1080
parsing_docs/summary.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user