In [1]:
import os
import sys

sys.path.append("../")
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())  # read local .env file
from alchemy import AlchemyAPI

hf_api_key = os.environ["HF_API_KEY"]

In [2]:
MODEL = "dslim/bert-base-NER"
HF_API = f"{os.environ['HF_API_BASE']}/{MODEL}"
HF_API

'https://api-inference.huggingface.co/models/dslim/bert-base-NER'

In [3]:
alchemyapi = AlchemyAPI(hf_api_key, HF_API)

In [4]:
alchemyText = "My name is Girish, I'm learning GenerativeAI and I live in Canada and i have github repo by GirishCodeAlchemy"
alchemyapi.invoke_model(alchemyText)

[{'entity_group': 'PER',
  'score': 0.9990140199661255,
  'word': 'G',
  'start': 11,
  'end': 12},
 {'entity_group': 'PER',
  'score': 0.8276980519294739,
  'word': '##iri',
  'start': 12,
  'end': 15},
 {'entity_group': 'PER',
  'score': 0.37389782071113586,
  'word': '##sh',
  'start': 15,
  'end': 17},
 {'entity_group': 'MISC',
  'score': 0.590412974357605,
  'word': 'GenerativeA',
  'start': 32,
  'end': 43},
 {'entity_group': 'LOC',
  'score': 0.999776303768158,
  'word': 'Canada',
  'start': 59,
  'end': 65},
 {'entity_group': 'ORG',
  'score': 0.7062660455703735,
  'word': 'Giri',
  'start': 92,
  'end': 96},
 {'entity_group': 'ORG',
  'score': 0.9375653266906738,
  'word': '##CodeAlchemy',
  'start': 98,
  'end': 109}]

## Gradio Interface


In [5]:
import gradio as gr


def named_entity(input):
    output = alchemyapi.invoke_model(input)
    return {"text": input, "entities": output}


demo = gr.Interface(
    fn=named_entity,
    inputs=[gr.Textbox(label="Text to find entities", lines=2)],
    outputs=[gr.HighlightedText(label="Text with entities")],
    title="NER with dslim/bert-base-NER",
    description="Find entities using the `dslim/bert-base-NER` model under the hood!",
    allow_flagging="never",
    # Here we introduce a new tag, examples, easy to use examples for your application
    examples=[alchemyText],
)
demo.launch()

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




### Adding a helper function to merge tokens


In [6]:
def merge_tokens(tokens):
    merged_tokens = []
    for token in tokens:
        if (
            merged_tokens
            and merged_tokens[-1]["entity_group"] == token["entity_group"]
            and merged_tokens[-1]["end"] == token["start"]  # Ensure continuity
        ):
            # If the current token continues the previous token, merge them
            last_token = merged_tokens[-1]
            last_token["word"] += token["word"].replace("##", "")
            last_token["end"] = token["end"]
            last_token["score"] = (last_token["score"] + token["score"]) / 2
        else:
            # Otherwise, add the token to the list
            merged_tokens.append(token)

    return merged_tokens

In [7]:
def named_entity(input):
    output = alchemyapi.invoke_model(input)
    merge_token = merge_tokens(output)
    return {"text": input, "entities": merge_token}

In [8]:
gr.close_all()

demo = gr.Interface(
    fn=named_entity,
    inputs=[gr.Textbox(label="Text to find entities", lines=2)],
    outputs=[gr.HighlightedText(label="Text with entities")],
    title="NER with dslim/bert-base-NER",
    description="Find entities using the `dslim/bert-base-NER` model under the hood!",
    allow_flagging="never",
    # Here we introduce a new tag, examples, easy to use examples for your application
    examples=[alchemyText],
)
demo.launch()

Closing server running on port: 7862
Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




In [9]:
gr.close_all()

Closing server running on port: 7862
Closing server running on port: 7862


# Manual Model invocation


In [10]:
from transformers import pipeline

get_completion = pipeline("ner", model="dslim/bert-base-NER")


def ner(input):
    output = get_completion(input)
    return {"text": input, "entities": output}


ner(alchemyText)

2024-02-15 19:45:51.916069: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


{'text': "My name is Girish, I'm learning GenerativeAI and I live in Canada and i have github repo by GirishCodeAlchemy",
 'entities': [{'entity': 'B-PER',
   'score': 0.999014,
   'index': 4,
   'word': 'G',
   'start': 11,
   'end': 12},
  {'entity': 'B-PER',
   'score': 0.8276972,
   'index': 5,
   'word': '##iri',
   'start': 12,
   'end': 15},
  {'entity': 'B-PER',
   'score': 0.37389728,
   'index': 6,
   'word': '##sh',
   'start': 15,
   'end': 17},
  {'entity': 'B-MISC',
   'score': 0.73191583,
   'index': 12,
   'word': 'Gene',
   'start': 32,
   'end': 36},
  {'entity': 'I-MISC',
   'score': 0.56740975,
   'index': 13,
   'word': '##rative',
   'start': 36,
   'end': 42},
  {'entity': 'I-MISC',
   'score': 0.471914,
   'index': 14,
   'word': '##A',
   'start': 42,
   'end': 43},
  {'entity': 'B-LOC',
   'score': 0.9997763,
   'index': 20,
   'word': 'Canada',
   'start': 59,
   'end': 65},
  {'entity': 'B-ORG',
   'score': 0.93222725,
   'index': 30,
   'word': 'G',
   'sta