Skip to content

extract_from_source_samples_extractions

roei sabag edited this page Jul 12, 2020 · 1 revision

Examples

Example no. 1

Can be tested on

Extract all students First Name into a table with FirstName column.

Action Rule (JSON)

// action rule
{
    "action": "ExtractFromSource"
}

// extraction rule
{
    "onRootElements": "//td[contains(@id,'student_first_name')]",
    "pageSource": true,
    "onElements": [
        {
            "key": "FirstName"
        }
    ]
}

Rhino Literal

extract from dom on {//td[contains(@id,'student_first_name')]}
    < column {FirstName}

CSharp

// action rule
var actionRule = new ActionRule
{
    Action = PluginsList.ExtractFromSource
};

// extraction rule
var contentEntries = new[]
{
    new ContentEntry { Key = "FirstName" }
};
var extraction = new ExtractionRule
{
    OnRootElements = "//td[contains(@id,'student_first_name')]",
    PageSource = true,
    OnElements = contentEntries
};

Python

# action rule
action_rule = {
    "action": "ExtractFromSource"
}

# extraction rule
content_entries = [
    {
        "key": "FirstName"
    }
]
extraction = {
    "onRootElements": "//td[contains(@id,'student_first_name')]",
    "pageSource": True,
    "onElements": content_entries
}

Java Script

// action rule
var actionRule = {
    action: "ExtractFromSource"
}

// extraction rule
var contentEntries = [
    {
        key: "FirstName"
    }
]
var extraction = {
    onRootElements: "//td[contains(@id,'student_first_name')]",
    pageSource: true,
    onElements: content_entries
}

Java

// action rule
ActionRule actionRule = new ActionRule().setAction("ExtractFromSource");

// extraction rule
ContentEntry[] contentEntries = new EntriesBuilder()
        .addEntries(new ContentEntry().setKey("FirstName"))
        .build();

ExtractionRule extraction = new ExtractionRule()
        .setOnRootElements("//td[contains(@id,'student_first_name')]")
        .setPageSource(true)
        .setOnElements(contentEntries);

Example no. 2

Can be tested on

Extract only the first character from all students First Name into a table with FirstName column.

Action Rule (JSON)

// action rule
{
    "action": "ExtractFromSource"
}

// extraction rule
{
    "onRootElements": "//td[contains(@id,'student_first_name')]",
    "pageSource": true,
    "onElements": [
        {
            "key": "FirstName",
            "regularExpression": "^\\w{1}"
        }
    ]
}

Rhino Literal

extract from dom on {//td[contains(@id,'student_first_name')]}
    < column {FirstName} filter {^\w{1}}

CSharp

// action rule
var actionRule = new ActionRule
{
    Action = PluginsList.ExtractFromSource
};

// extraction rule
var contentEntries = new[]
{
    new ContentEntry { Key = "FirstName", RegularExpression: "^\\w{1}" }
};
var extraction = new ExtractionRule
{
    OnRootElements = "//td[contains(@id,'student_first_name')]",
    PageSource = true,
    OnElements = contentEntries
};

Python

# action rule
action_rule = {
    "action": "ExtractFromSource"
}

# extraction rule
content_entries = [
    {
        "key": "FirstName",
        "regularExpression": "^\\w{1}"
    }
]
extraction = {
    "onRootElements": "//td[contains(@id,'student_first_name')]",
    "pageSource": True,
    "onElements": content_entries
}

Java Script

// action rule
var actionRule = {
    action: "ExtractFromSource"
}

// extraction rule
var contentEntries = [
    {
        key: "FirstName",
        regularExpression: "^\\w{1}"
    }
]
var extraction = {
    onRootElements: "//td[contains(@id,'student_first_name')]",
    pageSource: true,
    onElements: content_entries
}

Java

// action rule
ActionRule actionRule = new ActionRule().setAction("ExtractFromSource");

// extraction rule
ContentEntry[] contentEntries = new EntriesBuilder()
        .addEntries(
            new ContentEntry()
                    .setKey("FirstName")
                    .setRegularExpression("^\\w{1}"))
        .build();

ExtractionRule extraction = new ExtractionRule()
        .setOnRootElements("//td[contains(@id,'student_first_name')]")
        .setPageSource(true)
        .setOnElements(contentEntries);

Example no. 3

Can be tested on

Extract the HTML id property, from all students First Name HTML element, into a table with FirstName column.

Action Rule (JSON)

// action rule
{
    "action": "ExtractFromSource"
}

// extraction rule
{
    "onRootElements": "//td[contains(@id,'student_first_name')]",
    "pageSource": true,
    "onElements": [
        {
            "key": "FirstName",
            "onAttribute": "id"
        }
    ]
}

Rhino Literal

extract from dom on {//td[contains(@id,'student_first_name')]}
    < column {FirstName} from {id}

CSharp

// action rule
var actionRule = new ActionRule
{
    Action = PluginsList.ExtractFromSource
};

// extraction rule
var contentEntries = new[]
{
    new ContentEntry { Key = "FirstName", OnAttribute: "id" }
}
var extraction = new ExtractionRule
{
    OnRootElements = "//td[contains(@id,'student_first_name')]",
    PageSource = true,
    OnElements = contentEntries
};

Python

# action rule
action_rule = {
    "action": "ExtractFromSource"
}

# extraction rule
content_entries = [
    {
        "key": "FirstName",
        "onAttribute": "id"
    }
]
extraction = {
    "onRootElements": "//td[contains(@id,'student_first_name')]",
    "pageSource": True,
    "onElements": content_entries
}

Java Script

// action rule
var actionRule = {
    action: "ExtractFromSource"
}

// extraction rule
var contentEntries = [
    {
        key: "FirstName",
        onAttribute: "id"
    }
]
var extraction = {
    onRootElements: "//td[contains(@id,'student_first_name')]",
    pageSource: true,
    onElements: content_entries
}

Java

// action rule
ActionRule actionRule = new ActionRule().setAction("ExtractFromSource");

// extraction rule
ContentEntry[] contentEntries = new EntriesBuilder()
        .addEntries(
            new ContentEntry()
                    .setKey("FirstName")
                    .setOnAttribute("id"))
        .build();

ExtractionRule extraction = new ExtractionRule()
        .setOnRootElements("//td[contains(@id,'student_first_name')]")
        .setPageSource(true)
        .setOnElements(contentEntries);

Example no. 4

Can be tested on

Extract all students First Name and Last Name into a table with FirstName and Last Name columns.

ℹ️ This approach caching the root element and performes a search in it, using relative XPath. If you will use an absolute XPath, the search will be performed on the page level and not within the cached element.

Action Rule (JSON)

// action rule
{
    "action": "ExtractFromSource"
}

// extraction rule
{
    "onRootElements": "//tbody/tr",
    "pageSource": true,
    "onElements": [
        {
            "key": "FirstName",
            "onElement": ".//td[contains(@id,'student_first_name')]"
        },
        {
            "key": "LastName",
            "onElement": ".//td[contains(@id,'student_last_name')]"
        }        
    ]
}

Rhino Literal

extract from dom on {//td[contains(@id,'student_first_name')]}
    < column {FirstName} take {.//td[contains(@id,'student_first_name')]}
    < column {LastName} take {.//td[contains(@id,'student_last_name')]}

CSharp

// action rule
var actionRule = new ActionRule
{
    Action = PluginsList.ExtractFromSource
};

// extraction rule
var contentEntries = new[]
{
    new ContentEntry
    { 
        Key = "FirstName",
        OnElement: ".//td[contains(@id,'student_first_name')]"
    },
    new ContentEntry
    { 
        Key = "LastName",
        OnElement: ".//td[contains(@id,'student_last_name')]"
    }    
};
var extraction = new ExtractionRule
{
    OnRootElements = "//tbody/tr",
    PageSource = true,
    OnElements = contentEntries
};

Python

# action rule
action_rule = {
    "action": "ExtractFromSource"
}

# extraction rule
content_entries = [
    {
        "key": "FirstName",
        "onElement": ".//td[contains(@id,'student_first_name')]"
    },
    {
        "key": "LastName",
        "onElement": ".//td[contains(@id,'student_last_name')]"
    } 
]
extraction = {
    "onRootElements": "//tbody/tr",
    "pageSource": True,
    "onElements": content_entries
}

Java Script

// action rule
var actionRule = {
    action: "ExtractFromSource"
}

// extraction rule
var contentEntries = [
    {
        key: "FirstName",
        onElement: ".//td[contains(@id,'student_first_name')]"
    },
    {
        key: "LastName",
        onElement: ".//td[contains(@id,'student_last_name')]"
    } 
]
var extraction = {
    onRootElements: "//tbody/tr",
    pageSource: true,
    onElements: content_entries
}

Java

// action rule
ActionRule actionRule = new ActionRule().setAction("ExtractFromSource");

// extraction rule
ContentEntry[] contentEntries = new EntriesBuilder()
        .addEntries(
            new ContentEntry()
                    .setKey("FirstName")
                    .setOnElement(".//td[contains(@id,'student_first_name')]"),
            new ContentEntry()
                    .setKey("LastName")
                    .setOnElement(".//td[contains(@id,'student_last_name')]"))
        .build();

ExtractionRule extraction = new ExtractionRule()
        .setOnRootElements("//tbody/tr")
        .setPageSource(true)
        .setOnElements(contentEntries);

Example no. 5

Can be tested on

Extract the HTML Markup of all students First Name HTML element, into a table with FirstName column.

Action Rule (JSON)

// action rule
{
    "action": "ExtractFromSource"
}

// extraction rule
{
    "onRootElements": "//td[contains(@id,'student_first_name')]",
    "pageSource": true,
    "onElements": [
        {
            "key": "FirstName",
            "onAttribute": "html"
        }
    ]
}

Rhino Literal

extract from dom on {//td[contains(@id,'student_first_name')]}
    < column {FirstName} from {html}

CSharp

// action rule
var actionRule = new ActionRule
{
    Action = PluginsList.ExtractFromSource
};

// extraction rule
var contentEntries = new[]
{
    new ContentEntry { Key = "FirstName", OnAttribute: "html" }
};
var extraction = new ExtractionRule
{
    OnRootElements = "//td[contains(@id,'student_first_name')]",
    PageSource = true,
    OnElements = contentEntries
};

Python

# action rule
action_rule = {
    "action": "ExtractFromSource"
}

# extraction rule
content_entries = [
    {
        "key": "FirstName",
        "onAttribute": "html"
    }
]
extraction = {
    "onRootElements": "//td[contains(@id,'student_first_name')]",
    "pageSource": True,
    "onElements": content_entries
}

Java Script

// action rule
var actionRule = {
    action: "ExtractFromSource"
}

// extraction rule
var contentEntries = [
    {
        key: "FirstName",
        onAttribute: "html"
    }
]
var extraction = {
    onRootElements: "//td[contains(@id,'student_first_name')]",
    pageSource: true,
    onElements: content_entries
}

Java

// action rule
ActionRule actionRule = new ActionRule().setAction("ExtractFromSource");

// extraction rule
ContentEntry[] contentEntries = new EntriesBuilder()
        .addEntries(
            new ContentEntry()
                    .setKey("FirstName")
                    .setOnAttribute("html"))
        .build();

ExtractionRule extraction = new ExtractionRule()
        .setOnRootElements("//td[contains(@id,'student_first_name')]")
        .setPageSource(true)
        .setOnElements(contentEntries);

Clone this wiki locally