Story of Ready-to-drink Tea in Indonesia

Data Visualization from Registered Ready-to-drink Dataset in Indonesia

Photo by Crystal de Passillé-Chabot on Unsplash
#import libraries
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import itertools
import warnings
warnings.filterwarnings(“ignore”)
import io
import base64
from matplotlib import rc,animation
from mpl_toolkits.mplot3d import Axes3D
import os
import re
dataset = pd.read_csv(“dataset_bpom_tea.csv”)
dataset.head(10)

Distribution of Tea Drink Packages

dataset_kemasan = dataset[“jenis_kemasan”].value_counts().reset_index()
dataset_kemasan = dataset_kemasan[dataset_kemasan[“jenis_kemasan”]>=2]
plt.figure(figsize=(10,8))
ax = sns.barplot(y=dataset_kemasan[“index”][:15],x = dataset_kemasan[“jenis_kemasan”][:15],palette=”husl”, linewidth=1,edgecolor=”k”*15)
sns.set_style(“white”)
sns.despine()
plt.xlabel(“Number of product”)
plt.ylabel(“Package Type”)
plt.grid(True)
plt.title(“Distribution of Tea Drink Packages”,color=’b’)
for i,j in enumerate(dataset_kemasan[“jenis_kemasan”][:15].astype(str)):
ax.text(.7,i,j,fontsize = 9,color=”black”)
plt.show()

Distribution of Tea Servings

#get all serving sizes using regex search over iterated rows
a=[]
for index,item in dataset[“ukuran_kemasan”].iteritems():
for s in re.findall(r’[0–9]+’, item):
a.append(int(s))
#create new Pandas series and group them by 4 serving size groups
index = [‘Small’, ‘Medium’, ‘Large’]
dataset_servings = pd.DataFrame({‘value’: a})
bins = pd.IntervalIndex.from_tuples([(0, 200), (200,400), (400, 600), (600, 2000)])
dataset_servings = pd.cut(dataset_servings[‘value’], bins).value_counts()
ax = dataset_servings.plot.bar(color=”maroon”, alpha=0.7)
sns.set_style(“white”)
sns.despine()
plt.show()

Distribution of Flavours

dataset_flavour = datasetdataset_flavour[“is_jasmine”] = [1 if (‘Melati’ in x or ‘Jasmine’ in x) else 0 for x in dataset_flavour[‘nama_produk’]]
dataset_flavour[“is_black”] = [1 if (‘Hitam’ in x or ‘Black ‘ in x) else 0 for x in dataset_flavour[‘nama_produk’]]
dataset_flavour[“is_green”] = [1 if (‘Hijau’ in x or ‘Green ‘ in x) else 0 for x in dataset_flavour[‘nama_produk’]]
dataset_flavour[“is_white”] = [1 if (‘Putih’ in x or ‘White ‘ in x) else 0 for x in dataset_flavour[‘nama_produk’]]
dataset_flavour[“is_honey”] = [1 if (‘Madu’ in x or ‘Honey ‘ in x) else 0 for x in dataset_flavour[‘nama_produk’]]
dataset_flavour[“is_milk”] = [1 if (‘Susu’ in x or ‘Milk ‘ in x) else 0 for x in dataset_flavour[‘nama_produk’]]
dataset_flavour[“is_flavoured”] = [1 if (‘Rasa’ in x) else 0 for x in dataset_flavour[‘nama_produk’]]
dataset_flavour[“is_oolong”] = [1 if (‘Oolong’ in x) else 0 for x in dataset_flavour[‘nama_produk’]]
# Jasmine pie chart
labels = [‘Jasmine Tea’, ‘Non-Jasmine Tea’]
sizes = [dataset_flavour[dataset_flavour[“is_jasmine”]==1].count()[“is_jasmine”],dataset_flavour[dataset_flavour[“is_jasmine”]==0].count()[“is_jasmine”]]
fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct=’%1.1f%%’, colors =[‘yellow’, ‘azure’])
ax.axis(‘equal’) # Equal aspect ratio ensures the pie chart is circular
ax.set_title(‘Jasmine Tea Product’)
plt.show()

Import vs Local Brands

dataset_brand = dataset
dataset_brand[“is_local”] = [1 if (‘MD’ in x ) else 0 for x in dataset_brand[‘nomor_registrasi’]]

Manufacturer Location

# Select manufacturer and its location
dataset_loc = dataset[[“kota_produsen”,”produsen”]].drop_duplicates().reset_index()
loc_count = dataset_loc[“kota_produsen”].value_counts().reset_index()
print(“Tea drink manufacturers are distributed in %d cities.” % dataset_loc[“kota_produsen”].unique().size)
# Create graph
plt.figure(figsize=(10,8))
ax = sns.barplot(y=loc_count[“index”][:15],x = loc_count[“kota_produsen”][:15],palette=”husl”, linewidth=1,edgecolor=”k”*15)
sns.set_style(“white”)
sns.despine()
plt.xlabel(“Number of companies”)
plt.ylabel(“City”)
plt.grid(True)
plt.title(“Distribution of Producer Location (City)”,color=’b’,fontsize = 18)
for i,j in enumerate(loc_count[“kota_produsen”][:15].astype(str)):
ax.text(.7,i,j,fontsize = 9,color=”black”)
plt.show()

List of Companies

dataset_corp = dataset[“produsen”].unique()
print(“Number of companies:”,dataset_corp.size)
dataset_corp

Company Product Lines

dataset_cline = dataset[[“produsen”,”merk”]].drop_duplicates()
cline_count = dataset_cline[“produsen”].value_counts().reset_index()
#create graph
plt.figure(figsize=(10,8))
ax = sns.barplot(y=cline_count[“index”][:15],x = cline_count[“produsen”][:15],palette=”husl”, linewidth=1,edgecolor=”k”*15)
plt.xlabel(“Number of product line”)
plt.ylabel(“Company”)
plt.grid(True)
plt.title(“Companies with Most Product Lines”,color=’maroon’,fontsize = 18)
for i,j in enumerate(cline_count[“produsen”][:15].astype(str)):
ax.text(0.55,i,j,fontsize = 10,color=”black”)
plt.show()
cline_dist = cline_count[“produsen”]
plt.figure(figsize=(13,7))
ax = sns.distplot(cline_dist, kde=False, color=”maroon”)
sns.set_style(“white”)
sns.despine()
# Add title and axis names
plt.title(‘Distribution of Companies Based on Product Lines’)
plt.xlabel(‘Number of product lines’)
plt.ylabel(‘Number of companies’)
plt.show()

Companies Producing Less Plastics

non_plastics = [“Kaleng”, “Karton Laminat”, “Botol Kaca”, “Tetrapak”, “Pouch Aluminium Foil”]
plastics = [“Gelas Plastik”, “Botol Plastik”, “Plastik”, “Gelas Plastik PET”, “Botol Plastik PET”]
dataset_plastics = dataset[dataset[‘jenis_kemasan’].isin(plastics)]
dataset_non_plastics = dataset[dataset[‘jenis_kemasan’].isin(non_plastics)]
corp_plastics = dataset_plastics[“produsen”].drop_duplicates().values
corp_clean = dataset_non_plastics[~dataset_non_plastics.isin(corp_plastics)]
corp_clean = corp_clean[“produsen”].drop_duplicates().reset_index()

Closing

Software developer with MBA degree, mentor, somewhat fatherly figure, data and business synergy enthusiast

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store