Hello World

1 minute read

Hello world!

This is my first post!

I am just going to create the word cloud of the ‘Hello world’ sentence in some different languages. Translations will be scraped from ReversoContext. The goal of the post is just to say hello, so no further describing the methods used. I hope the blog will be at least a bit enjoyable šŸ˜€

import re
import requests

import lxml
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
from wordcloud import WordCloud
# List of languages
langs = [
    "German",
    "Spanish",
    "French",
    "Dutch",
    "Polish",
    "Portuguese",
    "Romanian",
    "Russian",
    "Turkish",
    "Italian",
    "Turkish",
]
def request(user_l, trans_l, word):
    # Send a get request to specified url
    url = f"https://context.reverso.net/translation/{user_l.lower()}-{trans_l.lower()}/{word.lower()}"
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0"
    }
    response = requests.get(url, headers=headers)
    return response
def get_trans(url, trans_l):
    # Scrap translations for one lang
    src = url.content
    soup = BeautifulSoup(src, "lxml")
    translations = soup.find_all(class_=re.compile("^translation"))
    translations = [
        a.text.strip("(\r\n|\n) ,") for i, a in enumerate(translations) if i > 1
    ]
    to_remove = re.compile(r"(world|hello)", re.IGNORECASE)
    translations = filter(lambda x: not to_remove.search(x), translations)
    translations = map(lambda x: x.replace(", ", " "), translations)
    return translations
def get_all(user_l, word):
    # Get translations for all langs in dict with freq 1 (requirement for wordcloud)
    d = {"Hello world": 1}
    for v in langs:
        url = request(user_l, v, word)
        for x in get_trans(url, v):
            d[x] = 1
    del d["Hallo-Welt-Programm"]  # that's quit weird translation
    return d
translations = get_all("english", "Hello world")
def plot_cloud(wordcloud):
    # Set figure size
    plt.figure(figsize=(15, 15))
    # Display image
    plt.imshow(wordcloud)
    # No axis details
    plt.axis("off")
wordcloud = WordCloud(
    mode="RGBA", width=3000, height=2000, random_state=1
).generate_from_frequencies(translations)
plot_cloud(wordcloud)

worldcloud

Seems like the work is done. Non-Latin languages like Arabic or Japanese are not included, because they are a bit tricky for the word cloud library. So I guess I can say hello world with a clear conscience šŸ¤–