OpenAI - Test Classification Prompts#

In this notebook we are going to use the Amazon Review Dataset to test Promptmeteo in the sentiment analysis task

1. Data Preparation - EN - Build sentiment dataset.#

The dataset contains reviews from Amazon in English collected between November 1, 2015 and November 1, 2019. Each record in the dataset contains the review text, the review title, the star rating, an anonymized reviewer ID, an anonymized product ID and the coarse-grained product category (e.g. ‘books’, ‘appliances’, etc.). The corpus is balanced across stars, so each star rating constitutes 20% of the reviews in each language.

[26]:
import polars as pl
import sys

sys.path.append("..")

data = pl.read_parquet("../data/amazon_reviews_en/amazon_reviews_multi-test.parquet")
sql = pl.SQLContext()
sql.register("data", data)

sentiment_data = (
    sql.execute("""
    SELECT
        review_body as REVIEW,
        CASE
            WHEN stars=1 THEN 'negative'
            WHEN stars=3 THEN 'neutral'
            WHEN stars=5 THEN 'positive'
            ELSE null
        END AS TARGET,
    FROM data
    WHERE stars!=2 AND stars!=4;
    """)
    .collect()
    .sample(fraction=1.0, shuffle=True, seed=0)
)

train_reviews = sentiment_data.head(100).select("REVIEW").to_series().to_list()
train_targets = sentiment_data.head(100).select("TARGET").to_series().to_list()

test_reviews = sentiment_data.tail(100).select("REVIEW").to_series().to_list()
test_targets = sentiment_data.tail(100).select("TARGET").to_series().to_list()

sentiment_data.head()
[26]:
shape: (5, 2)
REVIEWTARGET
strstr
"I reuse my Nes…"positive"
"Fits great kin…"positive"
"Movie freezes …"negative"
"This is my thi…"positive"
"For the money,…"neutral"
[3]:
token = "sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"

2. EN - Sin entrenamiento#

Prueba 1#

[4]:
prompt = """
TEMPLATE:
    "I need you to help me with a text classification task.
    {__PROMPT_DOMAIN__}
    {__PROMPT_LABELS__}

    {__CHAIN_THOUGHT__}
    {__ANSWER_FORMAT__}"


PROMPT_DOMAIN:
    "The texts you will be processing are from the {__DOMAIN__} domain."


PROMPT_LABELS:
    "I want you to classify the texts into one of the following categories:
    {__LABELS__}."


PROMPT_DETAIL:
    ""


CHAIN_THOUGHT:
    "Please provide a step-by-step argument for your answer, explain why you
    believe your final choice is justified, and make sure to conclude your
    explanation with the name of the class you have selected as the correct
    one, in lowercase and without punctuation."


ANSWER_FORMAT:
    "In your response, include only the name of the class as a single word, in
    lowercase, without punctuation, and without adding any other statements or
    words."
"""
[9]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
from promptmeteo import DocumentClassifier

model = DocumentClassifier(
    language="en",
    model_name="text-davinci-003",
    model_provider_name="openai",
    model_provider_token=token,
    prompt_domain="product reviews",
    prompt_labels=["positive", "negative", "neutral"],
    selector_k=0,
)

model.task.prompt.read_prompt(prompt)

pred_targets = model.predict(test_reviews)
pred_targets = [pred if len(pred) == 1 else [""] for pred in pred_targets]

sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap="Blues")
[9]:
<Axes: >
../_images/notebooks_05_test_openai_classification_prompts_12_1.png

Prueba 2#

[10]:
prompt = """
TEMPLATE:
    "I need you to help me with a text classification task.
    {__PROMPT_DOMAIN__}
    {__PROMPT_LABELS__}

    {__CHAIN_THOUGHT__}
    {__ANSWER_FORMAT__}"


PROMPT_DOMAIN:
    "The texts you will be processing are from the {__DOMAIN__} domain."


PROMPT_LABELS:
    "I want you to classify the texts into one of the following categories:
    {__LABELS__}."


PROMPT_DETAIL:
    ""


CHAIN_THOUGHT:
    "Think step by step you answer."


ANSWER_FORMAT:
    "In your response, include only the name of the class predicted."
"""
[12]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
from promptmeteo import DocumentClassifier

model = DocumentClassifier(
    language="en",
    model_name="text-davinci-003",
    model_provider_name="openai",
    model_provider_token=token,
    prompt_domain="product reviews",
    prompt_labels=["positive", "negative", "neutral"],
    selector_k=0,
)

model.task.prompt.read_prompt(prompt)

pred_targets = model.predict(test_reviews)

sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap="Blues")
[12]:
<Axes: >
../_images/notebooks_05_test_openai_classification_prompts_15_1.png

3. EN - Con entrenamiento#

Prueba 1#

[14]:
prompt = """
TEMPLATE:
    "I need you to help me with a text classification task.
    {__PROMPT_DOMAIN__}
    {__PROMPT_LABELS__}

    {__CHAIN_THOUGHT__}
    {__ANSWER_FORMAT__}"


PROMPT_DOMAIN:
    "The texts you will be processing are from the {__DOMAIN__} domain."


PROMPT_LABELS:
    "I want you to classify the texts into one of the following categories:
    {__LABELS__}."


PROMPT_DETAIL:
    ""


CHAIN_THOUGHT:
    "Please provide a step-by-step argument for your answer, explain why you
    believe your final choice is justified, and make sure to conclude your
    explanation with the name of the class you have selected as the correct
    one, in lowercase and without punctuation."


ANSWER_FORMAT:
    "In your response, include only the name of the class as a single word, in
    lowercase, without punctuation, and without adding any other statements or
    words."
"""
[15]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
from promptmeteo import DocumentClassifier

model = DocumentClassifier(
    language="en",
    model_name="text-davinci-003",
    model_provider_name="openai",
    model_provider_token=token,
    selector_k=10,
)

model.task.prompt.read_prompt(prompt)

model.train(examples=train_reviews, annotations=train_targets)

pred_targets = model.predict(test_reviews)

sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap="Blues")
[15]:
<Axes: >
../_images/notebooks_05_test_openai_classification_prompts_19_1.png

Prueba 2#

[27]:
prompt = """
TEMPLATE:
    "I need you to help me with a text classification task.
    {__PROMPT_DOMAIN__}
    {__PROMPT_LABELS__}

    {__CHAIN_THOUGHT__}
    {__ANSWER_FORMAT__}"


PROMPT_DOMAIN:
    "The texts you will be processing are from the {__DOMAIN__} domain."


PROMPT_LABELS:
    "I want you to classify the texts into one of the following categories:
    {__LABELS__}."


PROMPT_DETAIL:
    ""


CHAIN_THOUGHT:
    "Think step by step you answer."


ANSWER_FORMAT:
    "In your response, include only the name of the class predicted."
"""
[28]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
from promptmeteo import DocumentClassifier

model = DocumentClassifier(
    language="en",
    model_name="text-davinci-003",
    model_provider_name="openai",
    model_provider_token=token,
    prompt_domain="product reviews",
    prompt_labels=["positive", "negative", "neutral"],
    selector_k=20,
)

model.task.prompt.read_prompt(prompt)

model.train(
    examples=train_reviews,
    annotations=train_targets,
)

pred_targets = model.predict(test_reviews)

sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap="Blues")
[28]:
<Axes: >
../_images/notebooks_05_test_openai_classification_prompts_22_1.png
[ ]:

Prueba 3#

[19]:
prompt = """
TEMPLATE:
    "I need you to help me with a text classification task.
    {__PROMPT_DOMAIN__}
    {__PROMPT_LABELS__}

    {__CHAIN_THOUGHT__}
    {__ANSWER_FORMAT__}"

PROMPT_DOMAIN:
    ""

PROMPT_LABELS:
    ""

PROMPT_DETAIL:
    ""

CHAIN_THOUGHT:
    ""

ANSWER_FORMAT:
    ""
"""
[20]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
from promptmeteo import DocumentClassifier

model = DocumentClassifier(
    language="en",
    model_name="text-davinci-003",
    model_provider_name="openai",
    model_provider_token=token,
    selector_k=20,
)

model.task.prompt.read_prompt(prompt)

model.train(
    examples=train_reviews,
    annotations=train_targets,
)

pred_targets = model.predict(test_reviews)

sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap="Blues")
[20]:
<Axes: >
../_images/notebooks_05_test_openai_classification_prompts_27_1.png

4. Data Preparation - SP - Build sentiment dataset.#

[29]:
import polars as pl

data = pl.read_parquet("../data/amazon_reviews_sp/amazon_reviews_multi-test.parquet")
data.head()

sql = pl.SQLContext()
sql.register("data", data)

sentiment_data = (
    sql.execute("""
    SELECT
        review_body as REVIEW,
        CASE
            WHEN stars=1 THEN 'negative'
            WHEN stars=3 THEN 'neutral'
            WHEN stars=5 THEN 'positive'
            ELSE null
        END AS TARGET,
    FROM data
    WHERE stars!=2 AND stars!=4;
    """)
    .collect()
    .sample(fraction=1.0, shuffle=True, seed=0)
)
/
train_reviews = sentiment_data.head(100).select("REVIEW").to_series().to_list()
train_targets = sentiment_data.head(100).select("TARGET").to_series().to_list()

test_reviews = sentiment_data.tail(100).select("REVIEW").to_series().to_list()
test_targets = sentiment_data.tail(100).select("TARGET").to_series().to_list()

sentiment_data.head()
[29]:
shape: (5, 2)
REVIEWTARGET
strstr
"El filtro de d…"positive"
"Un poquito esc…"positive"
"Para qué decir…"negative"
"Mi hija esta e…"positive"
"Se podría mejo…"neutral"

5 SP - Sin Entrenamiento#

Prueba 1#

[49]:
prompt = """
TEMPLATE:
    "Necesito que me ayudes en una tarea de clasificación de texto.
    {__PROMPT_DOMAIN__}
    {__PROMPT_LABELS__}

    {__CHAIN_THOUGHT__}
    {__ANSWER_FORMAT__}"


PROMPT_DOMAIN:
    "Los textos que vas procesar del ambito de {__DOMAIN__}."


PROMPT_LABELS:
    "Quiero que me clasifiques los textos una de las siguientes categorías:
    {__LABELS__}."


PROMPT_DETAIL:
    ""


CHAIN_THOUGHT:
    "Por favor argumenta tu respuesta paso a paso, explica por qué crees que
    está justificada tu elección final, y asegúrate de que acabas tu
    explicación con el nombre de la clase que has escogido como la
    correcta, en minúscula y sin puntuación."


ANSWER_FORMAT:
    "En tu respuesta incluye sólo el nombre de la clase, como una única
    palabra, en minúscula, sin puntuación, y sin añadir ninguna otra
    afirmación o palabra."
"""
[33]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
from promptmeteo import DocumentClassifier

model = DocumentClassifier(
    language="es",
    model_name="text-davinci-003",
    model_provider_name="openai",
    model_provider_token=token,
    prompt_domain="opiniones de productos",
    prompt_labels=["positiva", "negativa", "neutral"],
    selector_k=0,
)

model.task.prompt.read_prompt(prompt)

pred_targets = model.predict(test_reviews)
pred_targets = [pred if len(pred) == 1 else [""] for pred in pred_targets]

sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap="Blues")
[33]:
<Axes: >
../_images/notebooks_05_test_openai_classification_prompts_38_1.png

Prueba 2#

[44]:
prompt = """
TEMPLATE:
    "Necesito que me ayudes en una tarea de clasificación de texto.
    {__PROMPT_DOMAIN__}
    {__PROMPT_LABELS__}

    {__CHAIN_THOUGHT__}
    {__ANSWER_FORMAT__}"


PROMPT_DOMAIN:
    "Los textos que vas procesar del ambito de {__DOMAIN__}."


PROMPT_LABELS:
    "Quiero que me clasifiques los textos una de las siguientes categorías:
    {__LABELS__}."


PROMPT_DETAIL:
    ""


CHAIN_THOUGHT:
    "Argumenta tu respuesta paso a paso."


ANSWER_FORMAT:
    "En tu respuesta incluye sólo el nombre de la clase, como una única
    respuesta"
"""
[35]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
from promptmeteo import DocumentClassifier

model = DocumentClassifier(
    language="es",
    model_name="text-davinci-003",
    model_provider_name="openai",
    model_provider_token=token,
    prompt_domain="opiniones de productos",
    prompt_labels=["positiva", "negativa", "neutra"],
    selector_k=0,
)

model.task.prompt.read_prompt(prompt)

pred_targets = model.predict(test_reviews)

sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap="Blues")
[35]:
<Axes: >
../_images/notebooks_05_test_openai_classification_prompts_41_1.png

Prueba 3#

[36]:
prompt = """
TEMPLATE:
    "Necesito que me ayudes en una tarea de clasificación de texto.
    {__PROMPT_DOMAIN__}
    {__PROMPT_LABELS__}

    {__CHAIN_THOUGHT__}
    {__ANSWER_FORMAT__}"


PROMPT_DOMAIN:
    ""


PROMPT_LABELS:
    "Quiero que me clasifiques los textos una de las siguientes categorías:
    {__LABELS__}."


PROMPT_DETAIL:
    ""


CHAIN_THOUGHT:
    ""


ANSWER_FORMAT:
    ""
"""
[38]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
from promptmeteo import DocumentClassifier

model = DocumentClassifier(
    language="es",
    model_name="text-davinci-003",
    model_provider_name="openai",
    model_provider_token=token,
    prompt_domain="opiniones de productos",
    prompt_labels=["positiva", "negativa", "neutra"],
    selector_k=0,
)

model.task.prompt.read_prompt(prompt)

pred_targets = model.predict(test_reviews)
pred_targets = [pred if len(pred) == 1 else [""] for pred in pred_targets]

sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap="Blues")
[38]:
<Axes: >
../_images/notebooks_05_test_openai_classification_prompts_44_1.png

ES - Con entrenamiento#

Prueba 1#

[39]:
prompt = """
TEMPLATE:
    "Necesito que me ayudes en una tarea de clasificación de texto.
    {__PROMPT_DOMAIN__}
    {__PROMPT_LABELS__}

    {__CHAIN_THOUGHT__}
    {__ANSWER_FORMAT__}"


PROMPT_DOMAIN:
    "Los textos que vas procesar del ambito de {__DOMAIN__}."


PROMPT_LABELS:
    "Quiero que me clasifiques los textos una de las siguientes categorías:
    {__LABELS__}."


PROMPT_DETAIL:
    ""


CHAIN_THOUGHT:
    "Por favor argumenta tu respuesta paso a paso, explica por qué crees que
    está justificada tu elección final, y asegúrate de que acabas tu
    explicación con el nombre de la clase que has escogido como la
    correcta, en minúscula y sin puntuación."


ANSWER_FORMAT:
    "En tu respuesta incluye sólo el nombre de la clase, como una única
    palabra, en minúscula, sin puntuación, y sin añadir ninguna otra
    afirmación o palabra."
"""
[41]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
from promptmeteo import DocumentClassifier

model = DocumentClassifier(
    language="es",
    model_name="text-davinci-003",
    model_provider_name="openai",
    model_provider_token=token,
    prompt_domain="opiniones de productos",
    prompt_labels=["positiva", "negativa", "neutra"],
    selector_k=10,
)

model.task.prompt.read_prompt(prompt)

model.train(
    examples=train_reviews,
    annotations=train_targets,
)

pred_targets = model.predict(test_reviews)
pred_targets = [pred if len(pred) == 1 else [""] for pred in pred_targets]

sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap="Blues")
[41]:
<Axes: >
../_images/notebooks_05_test_openai_classification_prompts_48_1.png

Prueba 2#

[45]:
prompt = """
TEMPLATE:
    "Necesito que me ayudes en una tarea de clasificación de texto.
    {__PROMPT_DOMAIN__}
    {__PROMPT_LABELS__}

    {__CHAIN_THOUGHT__}
    {__ANSWER_FORMAT__}"


PROMPT_DOMAIN:
    "Los textos que vas procesar del ambito de {__DOMAIN__}."


PROMPT_LABELS:
    "Quiero que me clasifiques los textos una de las siguientes categorías:
    {__LABELS__}."


PROMPT_DETAIL:
    ""


CHAIN_THOUGHT:
    "Argumenta tu respuesta paso a paso."


ANSWER_FORMAT:
    "En tu respuesta incluye sólo el nombre de la clase, como una única
    respuesta"
"""
[46]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
from promptmeteo import DocumentClassifier

model = DocumentClassifier(
    language="es",
    model_name="text-davinci-003",
    model_provider_name="openai",
    model_provider_token=token,
    prompt_domain="opiniones de productos",
    prompt_labels=["positiva", "negativa", "neutra"],
    selector_k=10,
)

model.task.prompt.read_prompt(prompt)

model.train(
    examples=train_reviews,
    annotations=train_targets,
)

pred_targets = model.predict(test_reviews)
pred_targets = [pred if len(pred) == 1 else [""] for pred in pred_targets]

sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap="Blues")
[46]:
<Axes: >
../_images/notebooks_05_test_openai_classification_prompts_51_1.png
[48]:
pred_targets
[48]:
[['neutra'],
 [''],
 [''],
 [''],
 ['positiva'],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 ['neutra'],
 ['neutra'],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 ['neutra'],
 [''],
 [''],
 ['neutra'],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 ['neutra'],
 ['neutra'],
 [''],
 ['neutra'],
 ['neutra'],
 ['neutra'],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 [''],
 ['neutra'],
 ['neutra'],
 [''],
 [''],
 ['neutra'],
 [''],
 ['neutra']]

Prueba 3#

[50]:
prompt = """
TEMPLATE:
    "Necesito que me ayudes en una tarea de clasificación de texto.
    {__PROMPT_DOMAIN__}
    {__PROMPT_LABELS__}

    {__CHAIN_THOUGHT__}
    {__ANSWER_FORMAT__}"


PROMPT_DOMAIN:
    ""


PROMPT_LABELS:
    ""


PROMPT_DETAIL:
    ""


CHAIN_THOUGHT:
    ""


ANSWER_FORMAT:
    ""
"""
[52]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
from promptmeteo import DocumentClassifier

model = DocumentClassifier(
    language="es",
    model_name="text-davinci-003",
    model_provider_name="openai",
    model_provider_token=token,
    prompt_domain="opiniones de productos",
    prompt_labels=["positiva", "negativa", "neutra"],
    selector_k=20,
)

model.task.prompt.read_prompt(prompt)

model.train(
    examples=train_reviews,
    annotations=train_targets,
)

pred_targets = model.predict(test_reviews)
pred_targets = [pred if len(pred) == 1 else [""] for pred in pred_targets]

sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap="Blues")
[52]:
<Axes: >
../_images/notebooks_05_test_openai_classification_prompts_55_1.png
[53]:
pred_targets
[53]:
[['neutra'],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 ['neutra'],
 ['neutra'],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 ['neutra'],
 ['neutra'],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 ['neutra'],
 [''],
 [''],
 ['neutra'],
 ['neutra'],
 ['neutra'],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 [''],
 [''],
 ['neutra'],
 [''],
 ['neutra'],
 [''],
 [''],
 ['neutra'],
 [''],
 ['neutra']]

Conclusiones#

  • Parece que con el modelo Flan-t5-small, el mejor resultado se obtiene añadiendo más ejemplot y quitando la instrucción del prompt

  • Parece que hay mucho errores asociados con que la respuesta tenga un espacio antes de laa respuesta