OpenBB/website/utils/generate_seo_for_documentation.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import yaml\n",
    "from collections import OrderedDict\n",
    "import openai\n",
    "import instructor\n",
    "from typing import List\n",
    "from pydantic import BaseModel, Field\n",
    "import os\n",
    "import fileinput\n",
    "import re\n",
    "\n",
    "openai.api_key = \"<YOUR_API_KEY>\""
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generate SEO for each documentation page"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "def parse_front_matter(file_path):\n",
    "    with open(file_path, 'r') as file:\n",
    "        lines = file.readlines()\n",
    "\n",
    "    front_matter = []\n",
    "    read_front_matter = False\n",
    "\n",
    "    for line in lines:\n",
    "        if line.strip() == '---':\n",
    "            if read_front_matter:\n",
    "                break\n",
    "            else:\n",
    "                read_front_matter = True\n",
    "        elif read_front_matter:\n",
    "            # Replace tabs with spaces\n",
    "            line = line.replace('\\t', '    ')\n",
    "            front_matter.append(line)\n",
    "\n",
    "    front_matter = \"\\n\".join(front_matter)\n",
    "    data = yaml.safe_load(front_matter)\n",
    "\n",
    "    return data\n",
    "\n",
    "def represent_ordereddict(dumper, data):\n",
    "    return dumper.represent_mapping(\n",
    "        yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,\n",
    "        data.items()\n",
    "    )\n",
    "\n",
    "yaml.add_representer(OrderedDict, represent_ordereddict)\n",
    "\n",
    "# Define pydantic model to be output from OpenAI when doing a list of keywords\n",
    "class Keyword(BaseModel):\n",
    "    \"\"\"Keyword for documentation page SEO\"\"\"\n",
    "    keyword: str = Field(..., description=\"Keyword for SEO\")\n",
    "\n",
    "class DescriptionAndKeywordsForSEO(BaseModel):\n",
    "    \"\"\"Description and list of keywords for documentation page to improve SEO\"\"\"\n",
    "    keywords: List[Keyword]\n",
    "    description: str = Field(..., description=\"Small description to be used for SEO\")\n",
    "\n",
    "instructor.patch()\n",
    "\n",
    "for product in [\"../content/pro\"]:\n",
    "    for root, dirs, files in os.walk(product):\n",
    "        for file in files:\n",
    "            if file.endswith(\".md\"):\n",
    "                filename = os.path.join(root, file)\n",
    "                with open(filename, 'r') as f:\n",
    "                    lines = f.readlines()\n",
    "                \n",
    "                # Find the second occurrence of '---\\n'\n",
    "                front_matter_end_index = lines.index('---\\n', 1)\n",
    "                # Get the content after the front matter\n",
    "                content = ''.join(lines[front_matter_end_index+1:])\n",
    "                # Get the metadata from the front matter\n",
    "                metadata = parse_front_matter(filename)\n",
    "\n",
    "                # Print the filename so we know which file we are processing\n",
    "                print(filename)\n",
    "\n",
    "                # Check if the metadata contains the required fields\n",
    "                if 'title' not in metadata:\n",
    "                    print(f\"    title not found in {filename}\")\n",
    "                    print(\"    TITLE MISSING FOR THIS PAGE!\")\n",
    "                if 'sidebar_position' not in metadata:\n",
    "                    # Reference documentation doesn't has sidebar missing\n",
    "                    if 'reference' not in filename:\n",
    "                        print(f\"    sidebar_position not found in {filename}\")\n",
    "                        print(\"    SIDEBAR MISSING FOR THIS PAGE!\")\n",
    "\n",
    "                success = True\n",
    "                try:\n",
    "                    # Use OpenAI to get an optimize description and keywords for SEO\n",
    "                    response = openai.ChatCompletion.create(\n",
    "                        model=\"gpt-4\",\n",
    "                        response_model=DescriptionAndKeywordsForSEO,\n",
    "                        max_retries=2,\n",
    "                        messages=[\n",
    "                            {   \"role\": \"system\", \n",
    "                                \"content\": \"You are an expert with 20+ years in marketing and SEO. You are required to work on metadata for each docusaurus page to improve SEO based on content written.\"\n",
    "                            },\n",
    "                            {\n",
    "                                \"role\": \"user\", \n",
    "                                \"content\": f\"Return a list of keywords and a small description for a marketing website page, taking into account the content of this page: {content}.\"\n",
    "                            },\n",
    "                        ]\n",
    "                    )\n",
    "                except Exception as e:\n",
    "                    success = False\n",
    "                    print(\"    ERROR WITH OPENAI API\")\n",
    "\n",
    "                # Write the new file\n",
    "                with open(filename, 'w') as f:\n",
    "                    f.write('---\\n')\n",
    "\n",
    "                    if success:\n",
    "                        # Format the description for Docusaurus\n",
    "                        metadata['description'] = response.description.replace('\\n', ' ')\n",
    "                        # Format the keywords for Docusaurus, as a list of words\n",
    "                        metadata['keywords'] = [item.keyword for item in response.keywords]\n",
    "                    else:\n",
    "                        metadata['description'] = \"\"\n",
    "                        metadata['keywords'] = []\n",
    "\n",
    "                    # Reorder the metadata dictionary\n",
    "                    ordered_metadata = OrderedDict()\n",
    "                    if \"title\" in metadata:\n",
    "                        ordered_metadata['title'] = metadata['title']\n",
    "                    if \"sidebar_position\" in metadata:\n",
    "                        ordered_metadata['sidebar_position'] = metadata['sidebar_position']\n",
    "                    ordered_metadata['description'] = metadata['description'].replace('\\n', ' ')\n",
    "                    ordered_metadata['keywords'] = metadata['keywords']\n",
    "\n",
    "                    # Write the metadata to the file\n",
    "                    yaml.dump(ordered_metadata, f, default_flow_style=False)\n",
    "\n",
    "                    # Write the end of the front matter\n",
    "                    f.write('---\\n')\n",
    "\n",
    "                    # Write the HeadTitle component if it is not the index.md file\n",
    "                    '''\n",
    "                    if \"index.md\" not in filename.split('/'):\n",
    "                        f.write(f\"\"\"\n",
    "import HeadTitle from '@site/src/components/General/HeadTitle.tsx';\n",
    "\n",
    "<HeadTitle title=\"{metadata['title']} - {' - '.join([word.title().replace('-', ' ') for word in filename.split('/')[::-1][1:]])} | OpenBB Docs\" />\n",
    "\"\"\")\n",
    "                    '''\n",
    "                    # Write the content after the front matter\n",
    "                    f.write(content)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Update HeadTitle based on product"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## FOR SDK\n",
    "\n",
    "# # Regular expression pattern to match the line\n",
    "# pattern = r'<HeadTitle title=\"(.+?) - (.+?) - Reference \\| OpenBB SDK Docs\" />'\n",
    "# #pattern = r'<HeadTitle title=\"(.+?) - (.+?) - (.+?) - Reference \\| OpenBB SDK Docs\" />'\n",
    "# #pattern = r'<HeadTitle title=\"(.+?) - (.+?) - (.+?) - (.+?) - Reference \\| OpenBB SDK Docs\" />'\n",
    "\n",
    "# # Function to generate the replacement string\n",
    "# def replacement(match):\n",
    "#     # Convert the captured groups to lowercase and form the replacement string\n",
    "#     return '<HeadTitle title=\"{}.{} - Reference | OpenBB SDK Docs\" />'.format(match.group(2).lower(), match.group(1).lower())\n",
    "#     #return '<HeadTitle title=\"{}.{}.{} - Reference | OpenBB SDK Docs\" />'.format(match.group(3).lower(), match.group(2).lower(), match.group(1).lower())\n",
    "#     #return '<HeadTitle title=\"{}.{}.{}.{} - Reference | OpenBB SDK Docs\" />'.format(match.group(4).lower(), match.group(3).lower(), match.group(2).lower(), match.group(1).lower())\n",
    "\n",
    "\n",
    "# FOR PLATFORM\n",
    "\n",
    "# # Regular expression pattern to match the line\n",
    "# #pattern = r'<HeadTitle title=\"(.+?) - (.+?) - Reference \\| OpenBB Platform Docs\" />'\n",
    "# pattern = r'<HeadTitle title=\"(.+?) - (.+?) - (.+?) - Reference \\| OpenBB Platform Docs\" />'\n",
    "# #pattern = r'<HeadTitle title=\"(.+?) - (.+?) - (.+?) - (.+?) - Reference \\| OpenBB Platform Docs\" />'\n",
    "\n",
    "# Function to generate the replacement string\n",
    "# def replacement(match):\n",
    "#     # Convert the captured groups to lowercase and form the replacement string\n",
    "#     #return '<HeadTitle title=\"{}.{} - Reference | OpenBB Platform Docs\" />'.format(match.group(2).lower(), match.group(1).lower())\n",
    "#     return '<HeadTitle title=\"{}.{}.{} - Reference | OpenBB Platform Docs\" />'.format(match.group(3).lower(), match.group(2).lower(), match.group(1).lower())\n",
    "#     #return '<HeadTitle title=\"{}.{}.{}.{} - Reference | OpenBB Platform Docs\" />'.format(match.group(4).lower(), match.group(3).lower(), match.group(2).lower(), match.group(1).lower())\n",
    "\n",
    "\n",
    "# FOR TERMINAL\n",
    "\n",
    "# # Regular expression pattern to match the line\n",
    "# #pattern = r'<HeadTitle title=\"(.+?) - (.+?) - Reference \\| OpenBB Terminal Docs\" />'\n",
    "# #pattern = r'<HeadTitle title=\"(.+?) - (.+?) - (.+?) - Reference \\| OpenBB Terminal Docs\" />'\n",
    "# pattern = r'<HeadTitle title=\"(.+?) - (.+?) - (.+?) - (.+?) - Reference \\| OpenBB Terminal Docs\" />'\n",
    "\n",
    "# # Function to generate the replacement string\n",
    "# def replacement(match):\n",
    "#     # Convert the captured groups to lowercase and form the replacement string\n",
    "#     #return '<HeadTitle title=\"{}/{} - Reference | OpenBB Terminal Docs\" />'.format(match.group(2).lower(), match.group(1).lower())\n",
    "#     #return '<HeadTitle title=\"{}/{}/{} - Reference | OpenBB Terminal Docs\" />'.format(match.group(3).lower(), match.group(2).lower(), match.group(1).lower())\n",
    "#     return '<HeadTitle title=\"{}/{}/{}/{} - Reference | OpenBB SDK Docs\" />'.format(match.group(4).lower(), match.group(3).lower(), match.group(2).lower(), match.group(1).lower())\n",
    "\n",
    "# BOT DISCORD\n",
    "\n",
    "# # Regular expression pattern to match the line\n",
    "# pattern = r'<HeadTitle title=\"(.+?) - (.+?) - Discord - Reference \\| OpenBB Bot Docs\" />'\n",
    "\n",
    "# # Function to generate the replacement string\n",
    "# def replacement(match):\n",
    "#     # Convert the captured groups to lowercase and form the replacement string\n",
    "#     return '<HeadTitle title=\"{}: {} - Discord Reference | OpenBB Bot Docs\" />'.format(match.group(2).lower(), match.group(1).lower())\n",
    "\n",
    "# BOT TELEGRAM\n",
    "\n",
    "# Regular expression pattern to match the line\n",
    "pattern = r'<HeadTitle title=\"(.+?) - (.+?) - Telegram - Reference \\| OpenBB Bot Docs\" />'\n",
    "\n",
    "# Function to generate the replacement string\n",
    "def replacement(match):\n",
    "    # Convert the captured groups to lowercase and form the replacement string\n",
    "    return '<HeadTitle title=\"{}: {} - Telegram Reference | OpenBB Bot Docs\" />'.format(match.group(2).lower(), match.group(1).lower())\n",
    "\n",
    "\n",
    "# Walk through current directory\n",
    "for dirpath, dirs, files in os.walk('bot/reference/telegram'):\n",
    "    for filename in files:\n",
    "        filepath = os.path.join(dirpath, filename)\n",
    "        # Check if file is a .md file\n",
    "        if filepath.endswith('.md'):\n",
    "            # Read the file\n",
    "            with fileinput.FileInput(filepath, inplace=True) as file:\n",
    "                for line in file:\n",
    "                    # Replace the line using regular expression\n",
    "                    print(re.sub(pattern, replacement, line), end='')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}