Files
OpenBB/website/utils/generate_seo_for_documentation.ipynb
DidierRLopes 504a4f442c Improve Pro documentation further (#5641)
* small improvement

* Fix TutorialVideo for mobile view

* main page terminal pro docs

* jose whelp

* placeholder index file that needs to be removed

* fix: fixes sidebar, removes unused index.md

* update link to api keys

* update api keys link

* fix hyperlinks in terminal helper

* fix a few more links

* fix some more links

* linter

* black linter

* improve main docs page for each product

* small refactor

* header to take to main page of product

* run OpenAI script to generate SEO for pro content

---------

Co-authored-by: jose-donato <43375532+jose-donato@users.noreply.github.com>
2023-11-03 16:46:20 +00:00

294 lines
13 KiB
Plaintext
Vendored

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import yaml\n",
"from collections import OrderedDict\n",
"import openai\n",
"import instructor\n",
"from typing import List\n",
"from pydantic import BaseModel, Field\n",
"import os\n",
"import fileinput\n",
"import re\n",
"\n",
"openai.api_key = \"<YOUR_API_KEY>\""
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Generate SEO for each documentation page"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"def parse_front_matter(file_path):\n",
" with open(file_path, 'r') as file:\n",
" lines = file.readlines()\n",
"\n",
" front_matter = []\n",
" read_front_matter = False\n",
"\n",
" for line in lines:\n",
" if line.strip() == '---':\n",
" if read_front_matter:\n",
" break\n",
" else:\n",
" read_front_matter = True\n",
" elif read_front_matter:\n",
" # Replace tabs with spaces\n",
" line = line.replace('\\t', ' ')\n",
" front_matter.append(line)\n",
"\n",
" front_matter = \"\\n\".join(front_matter)\n",
" data = yaml.safe_load(front_matter)\n",
"\n",
" return data\n",
"\n",
"def represent_ordereddict(dumper, data):\n",
" return dumper.represent_mapping(\n",
" yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,\n",
" data.items()\n",
" )\n",
"\n",
"yaml.add_representer(OrderedDict, represent_ordereddict)\n",
"\n",
"# Define pydantic model to be output from OpenAI when doing a list of keywords\n",
"class Keyword(BaseModel):\n",
" \"\"\"Keyword for documentation page SEO\"\"\"\n",
" keyword: str = Field(..., description=\"Keyword for SEO\")\n",
"\n",
"class DescriptionAndKeywordsForSEO(BaseModel):\n",
" \"\"\"Description and list of keywords for documentation page to improve SEO\"\"\"\n",
" keywords: List[Keyword]\n",
" description: str = Field(..., description=\"Small description to be used for SEO\")\n",
"\n",
"instructor.patch()\n",
"\n",
"for product in [\"../content/pro\"]:\n",
" for root, dirs, files in os.walk(product):\n",
" for file in files:\n",
" if file.endswith(\".md\"):\n",
" filename = os.path.join(root, file)\n",
" with open(filename, 'r') as f:\n",
" lines = f.readlines()\n",
" \n",
" # Find the second occurrence of '---\\n'\n",
" front_matter_end_index = lines.index('---\\n', 1)\n",
" # Get the content after the front matter\n",
" content = ''.join(lines[front_matter_end_index+1:])\n",
" # Get the metadata from the front matter\n",
" metadata = parse_front_matter(filename)\n",
"\n",
" # Print the filename so we know which file we are processing\n",
" print(filename)\n",
"\n",
" # Check if the metadata contains the required fields\n",
" if 'title' not in metadata:\n",
" print(f\" title not found in {filename}\")\n",
" print(\" TITLE MISSING FOR THIS PAGE!\")\n",
" if 'sidebar_position' not in metadata:\n",
" # Reference documentation doesn't has sidebar missing\n",
" if 'reference' not in filename:\n",
" print(f\" sidebar_position not found in {filename}\")\n",
" print(\" SIDEBAR MISSING FOR THIS PAGE!\")\n",
"\n",
" success = True\n",
" try:\n",
" # Use OpenAI to get an optimize description and keywords for SEO\n",
" response = openai.ChatCompletion.create(\n",
" model=\"gpt-4\",\n",
" response_model=DescriptionAndKeywordsForSEO,\n",
" max_retries=2,\n",
" messages=[\n",
" { \"role\": \"system\", \n",
" \"content\": \"You are an expert with 20+ years in marketing and SEO. You are required to work on metadata for each docusaurus page to improve SEO based on content written.\"\n",
" },\n",
" {\n",
" \"role\": \"user\", \n",
" \"content\": f\"Return a list of keywords and a small description for a marketing website page, taking into account the content of this page: {content}.\"\n",
" },\n",
" ]\n",
" )\n",
" except Exception as e:\n",
" success = False\n",
" print(\" ERROR WITH OPENAI API\")\n",
"\n",
" # Write the new file\n",
" with open(filename, 'w') as f:\n",
" f.write('---\\n')\n",
"\n",
" if success:\n",
" # Format the description for Docusaurus\n",
" metadata['description'] = response.description.replace('\\n', ' ')\n",
" # Format the keywords for Docusaurus, as a list of words\n",
" metadata['keywords'] = [item.keyword for item in response.keywords]\n",
" else:\n",
" metadata['description'] = \"\"\n",
" metadata['keywords'] = []\n",
"\n",
" # Reorder the metadata dictionary\n",
" ordered_metadata = OrderedDict()\n",
" if \"title\" in metadata:\n",
" ordered_metadata['title'] = metadata['title']\n",
" if \"sidebar_position\" in metadata:\n",
" ordered_metadata['sidebar_position'] = metadata['sidebar_position']\n",
" ordered_metadata['description'] = metadata['description'].replace('\\n', ' ')\n",
" ordered_metadata['keywords'] = metadata['keywords']\n",
"\n",
" # Write the metadata to the file\n",
" yaml.dump(ordered_metadata, f, default_flow_style=False)\n",
"\n",
" # Write the end of the front matter\n",
" f.write('---\\n')\n",
"\n",
" # Write the HeadTitle component if it is not the index.md file\n",
" '''\n",
" if \"index.md\" not in filename.split('/'):\n",
" f.write(f\"\"\"\n",
"import HeadTitle from '@site/src/components/General/HeadTitle.tsx';\n",
"\n",
"<HeadTitle title=\"{metadata['title']} - {' - '.join([word.title().replace('-', ' ') for word in filename.split('/')[::-1][1:]])} | OpenBB Docs\" />\n",
"\"\"\")\n",
" '''\n",
" # Write the content after the front matter\n",
" f.write(content)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Update HeadTitle based on product"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## FOR SDK\n",
"\n",
"# # Regular expression pattern to match the line\n",
"# pattern = r'<HeadTitle title=\"(.+?) - (.+?) - Reference \\| OpenBB SDK Docs\" />'\n",
"# #pattern = r'<HeadTitle title=\"(.+?) - (.+?) - (.+?) - Reference \\| OpenBB SDK Docs\" />'\n",
"# #pattern = r'<HeadTitle title=\"(.+?) - (.+?) - (.+?) - (.+?) - Reference \\| OpenBB SDK Docs\" />'\n",
"\n",
"# # Function to generate the replacement string\n",
"# def replacement(match):\n",
"# # Convert the captured groups to lowercase and form the replacement string\n",
"# return '<HeadTitle title=\"{}.{} - Reference | OpenBB SDK Docs\" />'.format(match.group(2).lower(), match.group(1).lower())\n",
"# #return '<HeadTitle title=\"{}.{}.{} - Reference | OpenBB SDK Docs\" />'.format(match.group(3).lower(), match.group(2).lower(), match.group(1).lower())\n",
"# #return '<HeadTitle title=\"{}.{}.{}.{} - Reference | OpenBB SDK Docs\" />'.format(match.group(4).lower(), match.group(3).lower(), match.group(2).lower(), match.group(1).lower())\n",
"\n",
"\n",
"# FOR PLATFORM\n",
"\n",
"# # Regular expression pattern to match the line\n",
"# #pattern = r'<HeadTitle title=\"(.+?) - (.+?) - Reference \\| OpenBB Platform Docs\" />'\n",
"# pattern = r'<HeadTitle title=\"(.+?) - (.+?) - (.+?) - Reference \\| OpenBB Platform Docs\" />'\n",
"# #pattern = r'<HeadTitle title=\"(.+?) - (.+?) - (.+?) - (.+?) - Reference \\| OpenBB Platform Docs\" />'\n",
"\n",
"# Function to generate the replacement string\n",
"# def replacement(match):\n",
"# # Convert the captured groups to lowercase and form the replacement string\n",
"# #return '<HeadTitle title=\"{}.{} - Reference | OpenBB Platform Docs\" />'.format(match.group(2).lower(), match.group(1).lower())\n",
"# return '<HeadTitle title=\"{}.{}.{} - Reference | OpenBB Platform Docs\" />'.format(match.group(3).lower(), match.group(2).lower(), match.group(1).lower())\n",
"# #return '<HeadTitle title=\"{}.{}.{}.{} - Reference | OpenBB Platform Docs\" />'.format(match.group(4).lower(), match.group(3).lower(), match.group(2).lower(), match.group(1).lower())\n",
"\n",
"\n",
"# FOR TERMINAL\n",
"\n",
"# # Regular expression pattern to match the line\n",
"# #pattern = r'<HeadTitle title=\"(.+?) - (.+?) - Reference \\| OpenBB Terminal Docs\" />'\n",
"# #pattern = r'<HeadTitle title=\"(.+?) - (.+?) - (.+?) - Reference \\| OpenBB Terminal Docs\" />'\n",
"# pattern = r'<HeadTitle title=\"(.+?) - (.+?) - (.+?) - (.+?) - Reference \\| OpenBB Terminal Docs\" />'\n",
"\n",
"# # Function to generate the replacement string\n",
"# def replacement(match):\n",
"# # Convert the captured groups to lowercase and form the replacement string\n",
"# #return '<HeadTitle title=\"{}/{} - Reference | OpenBB Terminal Docs\" />'.format(match.group(2).lower(), match.group(1).lower())\n",
"# #return '<HeadTitle title=\"{}/{}/{} - Reference | OpenBB Terminal Docs\" />'.format(match.group(3).lower(), match.group(2).lower(), match.group(1).lower())\n",
"# return '<HeadTitle title=\"{}/{}/{}/{} - Reference | OpenBB SDK Docs\" />'.format(match.group(4).lower(), match.group(3).lower(), match.group(2).lower(), match.group(1).lower())\n",
"\n",
"# BOT DISCORD\n",
"\n",
"# # Regular expression pattern to match the line\n",
"# pattern = r'<HeadTitle title=\"(.+?) - (.+?) - Discord - Reference \\| OpenBB Bot Docs\" />'\n",
"\n",
"# # Function to generate the replacement string\n",
"# def replacement(match):\n",
"# # Convert the captured groups to lowercase and form the replacement string\n",
"# return '<HeadTitle title=\"{}: {} - Discord Reference | OpenBB Bot Docs\" />'.format(match.group(2).lower(), match.group(1).lower())\n",
"\n",
"# BOT TELEGRAM\n",
"\n",
"# Regular expression pattern to match the line\n",
"pattern = r'<HeadTitle title=\"(.+?) - (.+?) - Telegram - Reference \\| OpenBB Bot Docs\" />'\n",
"\n",
"# Function to generate the replacement string\n",
"def replacement(match):\n",
" # Convert the captured groups to lowercase and form the replacement string\n",
" return '<HeadTitle title=\"{}: {} - Telegram Reference | OpenBB Bot Docs\" />'.format(match.group(2).lower(), match.group(1).lower())\n",
"\n",
"\n",
"# Walk through current directory\n",
"for dirpath, dirs, files in os.walk('bot/reference/telegram'):\n",
" for filename in files:\n",
" filepath = os.path.join(dirpath, filename)\n",
" # Check if file is a .md file\n",
" if filepath.endswith('.md'):\n",
" # Read the file\n",
" with fileinput.FileInput(filepath, inplace=True) as file:\n",
" for line in file:\n",
" # Replace the line using regular expression\n",
" print(re.sub(pattern, replacement, line), end='')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}